def setUp(self): self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data') self.dbName = RDConfig.RDTestDatabase self.details = BuildComposite.SetDefaults() self.details.dbName = self.dbName self.details.dbUser = RDConfig.defaultDBUser self.details.dbPassword = RDConfig.defaultDBPassword
def test1(self): """ basics """ self.details.tableName = 'ferro_quant' refComposName = 'ferromag_quant_10.pkl' refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName), 'rb')) # first make sure the data are intact self._init(refCompos) compos = BuildComposite.RunIt(self.details,saveIt=0) self.compare(compos,refCompos)
def test7(self): """ Test composite of naive bayes""" self.details.tableName = 'ferro_noquant' refComposName = 'ferromag_NaiveBayes.pkl' pklFile = open(os.path.join(self.baseDir,refComposName), 'rb') refCompos = pickle.load(pklFile) self._init(refCompos,copyBounds=1) self.details.useTrees = 0 self.details.useNaiveBayes = 1 self.details.mEstimateVal = 20.0 self.details.qBounds = [0] + [2]*6 + [0] compos = BuildComposite.RunIt(self.details, saveIt= 0) self.compare(compos,refCompos)
def test6(self): """ auto bounds with a real valued activity""" self.details.tableName = 'ferro_noquant_realact' refComposName = 'ferromag_auto_10_3.pkl' refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName), 'rb')) # first make sure the data are intact self._init(refCompos,copyBounds=1) self.details.limitDepth = 3 self.details.nModels = 10 self.details.activityBounds=[0.5] compos = BuildComposite.RunIt(self.details,saveIt=0) self.compare(compos,refCompos)
def test4(self): """ more trees """ self.details.tableName = 'ferro_quant' refComposName = 'ferromag_quant_50_3.pkl' refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName), 'rb')) # first make sure the data are intact self._init(refCompos) self.details.limitDepth = 3 self.details.nModels = 50 compos = BuildComposite.RunIt(self.details,saveIt=0) self.compare(compos,refCompos)
def test3(self): """ depth limit + less greedy """ self.details.tableName = 'ferro_quant' refComposName = 'ferromag_quant_10_3_lessgreedy.pkl' refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName), 'rb')) # first make sure the data are intact self._init(refCompos) self.details.limitDepth = 3 self.details.lessGreedy = 1 compos = BuildComposite.RunIt(self.details,saveIt=0) self.compare(compos,refCompos)
def test7(self): """ Test composite of naive bayes""" self.details.tableName = 'ferro_noquant' refComposName = 'ferromag_NaiveBayes.pkl' with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile: buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') pklTFile.close() with io.BytesIO(buf) as pklFile: refCompos = pickle.load(pklFile) self._init(refCompos, copyBounds=1) self.details.useTrees = 0 self.details.useNaiveBayes = 1 self.details.mEstimateVal = 20.0 self.details.qBounds = [0] + [2] * 6 + [0] compos = BuildComposite.RunIt(self.details, saveIt=0) self.compare(compos, refCompos)
def test2(self): """ depth limit """ self.details.tableName = 'ferro_quant' refComposName = 'ferromag_quant_10_3.pkl' with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: refCompos = pickle.load(pklF) # first make sure the data are intact self._init(refCompos) self.details.limitDepth = 3 compos = BuildComposite.RunIt(self.details, saveIt=0) self.compare(compos, refCompos)
def test6(self): """ auto bounds with a real valued activity""" self.details.tableName = 'ferro_noquant_realact' refComposName = 'ferromag_auto_10_3.pkl' with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: refCompos = pickle.load(pklF) # first make sure the data are intact self._init(refCompos, copyBounds=1) self.details.limitDepth = 3 self.details.nModels = 10 self.details.activityBounds = [0.5] compos = BuildComposite.RunIt(self.details, saveIt=0) self.compare(compos, refCompos)
def test1_basics(self): # """ basics """ self.details.tableName = 'ferro_quant' refComposName = 'ferromag_quant_10.pkl' with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: refCompos = pickle.load(pklF) # first make sure the data are intact self._init(refCompos) compos = BuildComposite.RunIt(self.details, saveIt=0) # pickle.dump(compos,open(os.path.join(self.baseDir,refComposName), 'wb')) # with open(os.path.join(self.baseDir,refComposName), 'rb') as pklF: # refCompos = pickle.load(pklF) self.compare(compos, refCompos)
def GrowIt(details,composite,progressCallback=None, saveIt=1,setDescNames=0,data=None): """ does the actual work of building a composite model **Arguments** - details: a _CompositeRun.CompositeRun_ object containing details (options, parameters, etc.) about the run - composite: the composite model to grow - progressCallback: (optional) a function which is called with a single argument (the number of models built so far) after each model is built. - saveIt: (optional) if this is nonzero, the resulting model will be pickled and dumped to the filename specified in _details.outName_ - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method will be called using the results of the data set's _GetVarNames()_ method; it is assumed that the details object has a _descNames attribute which is passed to the composites _SetDescriptorNames()_ method. Otherwise (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_. - data: (optional) the data set to be used. If this is not provided, the data set described in details will be used. **Returns** the enlarged composite model """ details.rundate = time.asctime() if data is None: fName = details.tableName.strip() if details.outName == '': details.outName = fName + '.pkl' if details.dbName == '': data = DataUtils.BuildQuantDataSet(fName) elif details.qBounds != []: details.tableName = fName data = details.GetDataSet() else: data = DataUtils.DBToQuantData(details.dbName,fName,quantName=details.qTableName, user=details.dbUser,password=details.dbPassword) nExamples = data.GetNPts() seed = composite._randomSeed DataUtils.InitRandomNumbers(seed) testExamples = [] if details.shuffleActivities == 1: DataUtils.RandomizeActivities(data,shuffle=1,runDetails=details) elif details.randomActivities == 1: DataUtils.RandomizeActivities(data,shuffle=0,runDetails=details) namedExamples = data.GetNamedData() trainExamples = namedExamples nExamples = len(trainExamples) message('Training with %d examples'%(nExamples)) message('\t%d descriptors'%(len(trainExamples[0])-2)) nVars = data.GetNVars() nPossibleVals = composite.nPossibleVals attrs = range(1,nVars+1) if details.useTrees: from rdkit.ML.DecTree import CrossValidate,PruneTree if details.qBounds != []: from rdkit.ML.DecTree import BuildQuantTree builder = BuildQuantTree.QuantTreeBoot else: from rdkit.ML.DecTree import ID3 builder = ID3.ID3Boot driver = CrossValidate.CrossValidationDriver pruner = PruneTree.PruneTree if setDescNames: composite.SetInputOrder(data.GetVarNames()) composite.Grow(trainExamples,attrs,[0]+nPossibleVals, buildDriver=driver, pruner=pruner, nTries=details.nModels,pruneIt=details.pruneIt, lessGreedy=details.lessGreedy,needsQuantization=0, treeBuilder=builder,nQuantBounds=details.qBounds, startAt=details.startAt, maxDepth=details.limitDepth, progressCallback=progressCallback, silent=not _verbose) else: from rdkit.ML.Neural import CrossValidate driver = CrossValidate.CrossValidationDriver composite.Grow(trainExamples,attrs,[0]+nPossibleVals,nTries=details.nModels, buildDriver=driver,needsQuantization=0) composite.AverageErrors() composite.SortModels() modelList,counts,avgErrs = composite.GetAllData() counts = numpy.array(counts) avgErrs = numpy.array(avgErrs) composite._varNames = data.GetVarNames() for i in range(len(modelList)): modelList[i].NameModel(composite._varNames) # do final statistics weightedErrs = counts*avgErrs averageErr = sum(weightedErrs)/sum(counts) devs = (avgErrs - averageErr) devs = devs * counts devs = numpy.sqrt(devs*devs) avgDev = sum(devs)/sum(counts) if _verbose: message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f'%(100.*averageErr,100.*avgDev)) if details.bayesModel: composite.Train(trainExamples,verbose=0) badExamples = [] if not details.detailedRes: if _verbose: message('Testing all examples') wrong = BuildComposite.testall(composite,namedExamples,badExamples) if _verbose: message('%d examples (%% %5.2f) were misclassified'%(len(wrong),100.*float(len(wrong))/float(len(namedExamples)))) _runDetails.overall_error = float(len(wrong))/len(namedExamples) if details.detailedRes: if _verbose: message('\nEntire data set:') resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()),data,composite, nPossibleVals[-1],details.threshold) nGood,nBad,nSkip,avgGood,avgBad,avgSkip,voteTab = resTup nPts = len(namedExamples) nClass = nGood+nBad _runDetails.overall_error = float(nBad) / nClass _runDetails.overall_correct_conf = avgGood _runDetails.overall_incorrect_conf = avgBad _runDetails.overall_result_matrix = repr(voteTab) nRej = nClass-nPts if nRej > 0: _runDetails.overall_fraction_dropped = float(nRej)/nPts return composite