def ClusterFromDetails(details): """ Returns the cluster tree """ data = MolSimilarity.GetFingerprints(details) if details.maxMols > 0: data = data[:details.maxMols] if details.outFileName: try: outF = open(details.outFileName, 'wb+') except IOError: error("Error: could not open output file %s for writing\n" % (details.outFileName)) return None else: outF = None if not data: return None clustTree = ClusterPoints(data, details.metric, details.clusterAlgo, haveLabels=0, haveActs=1) if outF: cPickle.dump(clustTree, outF) return clustTree
def Pickle(self, fileName='foo.pkl'): """ Writes this forest off to a file so that it can be easily loaded later **Arguments** fileName is the name of the file to be written """ pFile = open(fileName, 'wb+') cPickle.dump(self, pFile, 1) pFile.close()
def Pickle(self,fileName='foo.pkl'): """ Writes this forest off to a file so that it can be easily loaded later **Arguments** fileName is the name of the file to be written """ pFile = open(fileName,'wb+') cPickle.dump(self,pFile,1) pFile.close()
def _writeDetailFile(self, inF, outF): while 1: try: smi, refContribs = cPickle.load(inF) except EOFError: break else: mol = Chem.MolFromSmiles(smi) if mol: mol = Chem.AddHs(mol, 1) smi2 = Chem.MolToSmiles(mol) contribs = Crippen._GetAtomContribs(mol) cPickle.dump((smi, contribs), outF) else: print('Problems with SMILES:', smi)
def testPkl(self): # Test pickling v1 = self.klass(10) v1[1] = 1 v1[2] = 1 v1[3] = 1 pklName = 'foo.pkl' outF = open(pklName, 'wb+') cPickle.dump(v1, outF) outF.close() inF = open(pklName, 'rb') v2 = cPickle.load(inF) inF.close() os.unlink(pklName) assert tuple(v1.GetOnBits()) == tuple(v2.GetOnBits()), 'pkl failed'
def SaveState(self, fileName): """ Writes this calculator off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written """ try: f = open(fileName, 'wb+') except Exception: logger.error('cannot open output file %s for writing' % (fileName)) return cPickle.dump(self, f) f.close()
def _writeDetailFile(self,inF,outF): while 1: try: smi,refContribs = cPickle.load(inF) except EOFError: break else: mol = Chem.MolFromSmiles(smi) if mol: mol=Chem.AddHs(mol,1) smi2 = Chem.MolToSmiles(mol) contribs = Crippen._GetAtomContribs(mol) cPickle.dump((smi,contribs),outF) else: print('Problems with SMILES:',smi)
def SaveState(self, fileName): """ Writes this calculator off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written """ try: f = open(fileName, 'wb+') except Exception: print('cannot open output file %s for writing' % (fileName)) return cPickle.dump(self, f) f.close()
def testPkl(self): """ test pickling """ v1 = klass(10) v1[1] = 1 v1[2] = 1 v1[3] = 1 pklName = "foo.pkl" outF = open(pklName, "wb+") cPickle.dump(v1, outF) outF.close() inF = open(pklName, "rb") v2 = cPickle.load(inF) inF.close() os.unlink(pklName) assert tuple(v1.GetOnBits()) == tuple(v2.GetOnBits()), "pkl failed"
def SaveState(self, fileName): """ Writes this calculator off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written """ from rdkit.six.moves import cPickle try: f = open(fileName, 'wb+') except: logger.error('cannot open output file %s for writing' % (fileName)) return cPickle.dump(self, f) f.close()
def SaveState(self,fileName): """ Writes this calculator off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written """ from rdkit.six.moves import cPickle try: f = open(fileName,'wb+') except: print('cannot open output file %s for writing'%(fileName)) return cPickle.dump(self,f) f.close()
def Pickle(self,fileName='foo.pkl',saveExamples=0): """ Writes this composite off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written - saveExamples: if this is zero, the individual models will have their stored examples cleared. """ if not saveExamples: self.ClearModelExamples() pFile = open(fileName,'wb+') cPickle.dump(self,pFile,1) pFile.close()
def Pickle(self, fileName='foo.pkl', saveExamples=0): """ Writes this composite off to a file so that it can be easily loaded later **Arguments** - fileName: the name of the file to be written - saveExamples: if this is zero, the individual models will have their stored examples cleared. """ if not saveExamples: self.ClearModelExamples() pFile = open(fileName, 'wb+') cPickle.dump(self, pFile, 1) pFile.close()
def runIt(inFileName, outFileName, smiCol=0, maxMols=-1, delim=','): inF = gzip.open(inFileName, 'r') outF = open(outFileName, 'wb+') mols = [] nDone = 0 for line in inF.readlines(): if line[0] != '#': splitL = line.strip().split(delim) smi = splitL[smiCol].strip() print(smi) mol = Chem.MolFromSmiles(smi) if mol: contribs = Crippen._GetAtomContribs(mol) cPickle.dump((smi, contribs), outF) nDone += 1 if maxMols > 0 and nDone >= maxMols: break outF.close()
def testPkl(self): " testing molecule pickle " import tempfile f,self.fName = tempfile.mkstemp('.pkl') f=None self.m = Chem.MolFromSmiles('CC(=O)CC') outF = open(self.fName,'wb+') cPickle.dump(self.m,outF) outF.close() inF = open(self.fName,'rb') m2 = cPickle.load(inF) inF.close() try: os.unlink(self.fName) except: pass oldSmi = Chem.MolToSmiles(self.m) newSmi = Chem.MolToSmiles(m2) assert oldSmi==newSmi,'string compare failed'
def testPkl(self): " testing molecule pickle " import tempfile f, self.fName = tempfile.mkstemp('.pkl') f = None self.m = Chem.MolFromSmiles('CC(=O)CC') outF = open(self.fName, 'wb+') cPickle.dump(self.m, outF) outF.close() inF = open(self.fName, 'rb') m2 = cPickle.load(inF) inF.close() try: os.unlink(self.fName) except Exception: pass oldSmi = Chem.MolToSmiles(self.m) newSmi = Chem.MolToSmiles(m2) assert oldSmi == newSmi, 'string compare failed'
def testPkl(self): " testing molecule pickle " import tempfile f, self.fName = tempfile.mkstemp(".pkl") f = None self.m = Chem.MolFromSmiles("CC(=O)CC") outF = open(self.fName, "wb+") cPickle.dump(self.m, outF) outF.close() inF = open(self.fName, "rb") m2 = cPickle.load(inF) inF.close() try: os.unlink(self.fName) except: pass oldSmi = Chem.MolToSmiles(self.m) newSmi = Chem.MolToSmiles(m2) assert oldSmi == newSmi, "string compare failed"
def WritePickledData(outName,data): """ writes either a .qdat.pkl or a .dat.pkl file **Arguments** - outName: the name of the file to be used - data: either an _MLData.MLDataSet_ or an _MLData.MLQuantDataSet_ """ varNames = data.GetVarNames() qBounds = data.GetQuantBounds() ptNames = data.GetPtNames() examples = data.GetAllData() with open(outName,'wb+') as outFile: cPickle.dump(varNames,outFile) cPickle.dump(qBounds,outFile) cPickle.dump(ptNames,outFile) cPickle.dump(examples,outFile)
def WritePickledData(outName, data): """ writes either a .qdat.pkl or a .dat.pkl file **Arguments** - outName: the name of the file to be used - data: either an _MLData.MLDataSet_ or an _MLData.MLQuantDataSet_ """ varNames = data.GetVarNames() qBounds = data.GetQuantBounds() ptNames = data.GetPtNames() examples = data.GetAllData() with open(outName, 'wb+') as outFile: cPickle.dump(varNames, outFile) cPickle.dump(qBounds, outFile) cPickle.dump(ptNames, outFile) cPickle.dump(examples, outFile)
def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0): nExamples = data.GetNPts() if details.lockRandom: seed = details.randomSeed else: import random seed = (random.randint(0, 1e6), random.randint(0, 1e6)) DataUtils.InitRandomNumbers(seed) testExamples = [] if details.shuffleActivities == 1: DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details) elif details.randomActivities == 1: DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details) namedExamples = data.GetNamedData() if details.splitRun == 1: trainIdx, testIdx = SplitData.SplitIndices(len(namedExamples), details.splitFrac, silent=not _verbose) trainExamples = [namedExamples[x] for x in trainIdx] testExamples = [namedExamples[x] for x in testIdx] else: testExamples = [] testIdx = [] trainIdx = range(len(namedExamples)) trainExamples = namedExamples if details.filterFrac != 0.0: # if we're doing quantization on the fly, we need to handle that here: if hasattr(details, 'activityBounds') and details.activityBounds: tExamples = [] bounds = details.activityBounds for pt in trainExamples: pt = pt[:] act = pt[-1] placed = 0 bound = 0 while not placed and bound < len(bounds): if act < bounds[bound]: pt[-1] = bound placed = 1 else: bound += 1 if not placed: pt[-1] = bound tExamples.append(pt) else: bounds = None tExamples = trainExamples trainIdx, temp = DataUtils.FilterData(tExamples, details.filterVal, details.filterFrac, -1, indicesOnly=1) tmp = [trainExamples[x] for x in trainIdx] testExamples += [trainExamples[x] for x in temp] trainExamples = tmp counts = DataUtils.CountResults(trainExamples, bounds=bounds) ks = counts.keys() ks.sort() message('Result Counts in training set:') for k in ks: message(str((k, counts[k]))) counts = DataUtils.CountResults(testExamples, bounds=bounds) ks = counts.keys() ks.sort() message('Result Counts in test set:') for k in ks: message(str((k, counts[k]))) nExamples = len(trainExamples) message('Training with %d examples' % (nExamples)) nVars = data.GetNVars() attrs = range(1, nVars + 1) nPossibleVals = data.GetNPossibleVals() for i in range(1, len(nPossibleVals)): if nPossibleVals[i - 1] == -1: attrs.remove(i) if details.pickleDataFileName != '': pickleDataFile = open(details.pickleDataFileName, 'wb+') cPickle.dump(trainExamples, pickleDataFile) cPickle.dump(testExamples, pickleDataFile) pickleDataFile.close() if details.bayesModel: composite = BayesComposite.BayesComposite() else: composite = Composite.Composite() composite._randomSeed = seed composite._splitFrac = details.splitFrac composite._shuffleActivities = details.shuffleActivities composite._randomizeActivities = details.randomActivities if hasattr(details, 'filterFrac'): composite._filterFrac = details.filterFrac if hasattr(details, 'filterVal'): composite._filterVal = details.filterVal composite.SetModelFilterData(details.modelFilterFrac, details.modelFilterVal) composite.SetActivityQuantBounds(details.activityBounds) nPossibleVals = data.GetNPossibleVals() if details.activityBounds: nPossibleVals[-1] = len(details.activityBounds) + 1 if setDescNames: composite.SetInputOrder(data.GetVarNames()) composite.SetDescriptorNames(details._descNames) else: composite.SetDescriptorNames(data.GetVarNames()) composite.SetActivityQuantBounds(details.activityBounds) if details.nModels == 1: details.internalHoldoutFrac = 0.0 if details.useTrees: from rdkit.ML.DecTree import CrossValidate, PruneTree if details.qBounds != []: from rdkit.ML.DecTree import BuildQuantTree builder = BuildQuantTree.QuantTreeBoot else: from rdkit.ML.DecTree import ID3 builder = ID3.ID3Boot driver = CrossValidate.CrossValidationDriver pruner = PruneTree.PruneTree composite.SetQuantBounds(details.qBounds) nPossibleVals = data.GetNPossibleVals() if details.activityBounds: nPossibleVals[-1] = len(details.activityBounds) + 1 composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, pruner=pruner, nTries=details.nModels, pruneIt=details.pruneIt, lessGreedy=details.lessGreedy, needsQuantization=0, treeBuilder=builder, nQuantBounds=details.qBounds, startAt=details.startAt, maxDepth=details.limitDepth, progressCallback=progressCallback, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors, silent=not _verbose) elif details.useSigTrees: from rdkit.ML.DecTree import CrossValidate from rdkit.ML.DecTree import BuildSigTree builder = BuildSigTree.SigTreeBuilder driver = CrossValidate.CrossValidationDriver nPossibleVals = data.GetNPossibleVals() if details.activityBounds: nPossibleVals[-1] = len(details.activityBounds) + 1 if hasattr(details, 'sigTreeBiasList'): biasList = details.sigTreeBiasList else: biasList = None if hasattr(details, 'useCMIM'): useCMIM = details.useCMIM else: useCMIM = 0 if hasattr(details, 'allowCollections'): allowCollections = details.allowCollections else: allowCollections = False composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, treeBuilder=builder, maxDepth=details.limitDepth, progressCallback=progressCallback, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors, biasList=biasList, useCMIM=useCMIM, allowCollection=allowCollections, silent=not _verbose) elif details.useKNN: from rdkit.ML.KNN import CrossValidate from rdkit.ML.KNN import DistFunctions driver = CrossValidate.CrossValidationDriver dfunc = '' if (details.knnDistFunc == "Euclidean"): dfunc = DistFunctions.EuclideanDist elif (details.knnDistFunc == "Tanimoto"): dfunc = DistFunctions.TanimotoDist else: assert 0, "Bad KNN distance metric value" composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, numNeigh=details.knnNeighs, holdOutFrac=details.internalHoldoutFrac, distFunc=dfunc) elif details.useNaiveBayes or details.useSigBayes: from rdkit.ML.NaiveBayes import CrossValidate driver = CrossValidate.CrossValidationDriver if not (hasattr(details, 'useSigBayes') and details.useSigBayes): composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, mEstimateVal=details.mEstimateVal, silent=not _verbose) else: if hasattr(details, 'useCMIM'): useCMIM = details.useCMIM else: useCMIM = 0 composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds, mEstimateVal=details.mEstimateVal, useSigs=True, useCMIM=useCMIM, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, silent=not _verbose) ## elif details.useSVM: ## from rdkit.ML.SVM import CrossValidate ## driver = CrossValidate.CrossValidationDriver ## composite.Grow(trainExamples, attrs, nPossibleVals=[0]+nPossibleVals, ## buildDriver=driver, nTries=details.nModels, ## needsQuantization=0, ## cost=details.svmCost,gamma=details.svmGamma, ## weights=details.svmWeights,degree=details.svmDegree, ## type=details.svmType,kernelType=details.svmKernel, ## coef0=details.svmCoeff,eps=details.svmEps,nu=details.svmNu, ## cache_size=details.svmCache,shrinking=details.svmShrink, ## dataType=details.svmDataType, ## holdOutFrac=details.internalHoldoutFrac, ## replacementSelection=details.replacementSelection, ## silent=not _verbose) else: from rdkit.ML.Neural import CrossValidate driver = CrossValidate.CrossValidationDriver composite.Grow(trainExamples, attrs, [0] + nPossibleVals, nTries=details.nModels, buildDriver=driver, needsQuantization=0) composite.AverageErrors() composite.SortModels() modelList, counts, avgErrs = composite.GetAllData() counts = numpy.array(counts) avgErrs = numpy.array(avgErrs) composite._varNames = data.GetVarNames() for i in range(len(modelList)): modelList[i].NameModel(composite._varNames) # do final statistics weightedErrs = counts * avgErrs averageErr = sum(weightedErrs) / sum(counts) devs = (avgErrs - averageErr) devs = devs * counts devs = numpy.sqrt(devs * devs) avgDev = sum(devs) / sum(counts) message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' % (100. * averageErr, 100. * avgDev)) if details.bayesModel: composite.Train(trainExamples, verbose=0) # blow out the saved examples and then save the composite: composite.ClearModelExamples() if saveIt: composite.Pickle(details.outName) details.model = DbModule.binaryHolder(cPickle.dumps(composite)) badExamples = [] if not details.detailedRes and (not hasattr(details, 'noScreen') or not details.noScreen): if details.splitRun: message('Testing all hold-out examples') wrong = testall(composite, testExamples, badExamples) message('%d examples (%% %5.2f) were misclassified' % (len(wrong), 100. * float(len(wrong)) / float(len(testExamples)))) _runDetails.holdout_error = float(len(wrong)) / len(testExamples) else: message('Testing all examples') wrong = testall(composite, namedExamples, badExamples) message('%d examples (%% %5.2f) were misclassified' % (len(wrong), 100. * float(len(wrong)) / float(len(namedExamples)))) _runDetails.overall_error = float(len(wrong)) / len(namedExamples) if details.detailedRes: message('\nEntire data set:') resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()), data, composite, nPossibleVals[-1], details.threshold) nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup nPts = len(namedExamples) nClass = nGood + nBad _runDetails.overall_error = float(nBad) / nClass _runDetails.overall_correct_conf = avgGood _runDetails.overall_incorrect_conf = avgBad _runDetails.overall_result_matrix = repr(voteTab) nRej = nClass - nPts if nRej > 0: _runDetails.overall_fraction_dropped = float(nRej) / nPts if details.splitRun: message('\nHold-out data:') resTup = ScreenComposite.ShowVoteResults(range(len(testExamples)), testExamples, composite, nPossibleVals[-1], details.threshold) nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup nPts = len(testExamples) nClass = nGood + nBad _runDetails.holdout_error = float(nBad) / nClass _runDetails.holdout_correct_conf = avgGood _runDetails.holdout_incorrect_conf = avgBad _runDetails.holdout_result_matrix = repr(voteTab) nRej = nClass - nPts if nRej > 0: _runDetails.holdout_fraction_dropped = float(nRej) / nPts if details.persistTblName and details.dbName: message('Updating results table %s:%s' % (details.dbName, details.persistTblName)) details.Store(db=details.dbName, table=details.persistTblName) if details.badName != '': badFile = open(details.badName, 'w+') for i in range(len(badExamples)): ex = badExamples[i] vote = wrong[i] outStr = '%s\t%s\n' % (ex, vote) badFile.write(outStr) badFile.close() composite.ClearModelExamples() return composite
from rdkit.Chem.PyMol import MolViewer from rdkit.Chem.Subshape import SubshapeBuilder, SubshapeObjects, SubshapeAligner from rdkit.six.moves import cPickle import copy m1 = Chem.MolFromMolFile('test_data/square1.mol') m2 = Chem.MolFromMolFile('test_data/square2.mol') b = SubshapeBuilder.SubshapeBuilder() b.gridDims = (10., 10., 5) b.gridSpacing = 0.4 b.winRad = 2.0 if 1: print('m1:') s1 = b.GenerateSubshapeShape(m1) cPickle.dump(s1, open('test_data/square1.shp.pkl', 'wb+')) print('m2:') s2 = b.GenerateSubshapeShape(m2) cPickle.dump(s2, open('test_data/square2.shp.pkl', 'wb+')) ns1 = b.CombineSubshapes(s1, s2) b.GenerateSubshapeSkeleton(ns1) cPickle.dump(ns1, open('test_data/combined.shp.pkl', 'wb+')) else: s1 = cPickle.load(open('test_data/square1.shp.pkl', 'rb')) s2 = cPickle.load(open('test_data/square2.shp.pkl', 'rb')) #ns1 = cPickle.load(file('test_data/combined.shp.pkl','rb')) ns1 = cPickle.load(open('test_data/combined.shp.pkl', 'rb')) v = MolViewer() SubshapeObjects.DisplaySubshape(v, s1, 'shape1') SubshapeObjects.DisplaySubshape(v, ns1, 'ns1')
def Pickle(self, fileName='foo.pkl'): """ Pickles the tree and writes it to disk """ with open(fileName, 'wb+') as pFile: cPickle.dump(self, pFile)
def Pickle(self, fileName="foo.pkl"): """ Pickles the tree and writes it to disk """ with open(fileName, "wb+") as pFile: cPickle.dump(self, pFile)
def FingerprintsFromDetails(details, reportFreq=10): data = None if details.dbName and details.tableName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbInfo from rdkit.ML.Data import DataUtils try: conn = DbConnect(details.dbName, details.tableName) except Exception: import traceback error('Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if not details.idName: details.idName = DbInfo.GetColumnNames(details.dbName, details.tableName)[0] dataSet = DataUtils.DBToData(details.dbName, details.tableName, what='%s,%s' % (details.idName, details.smilesName)) idCol = 0 smiCol = 1 elif details.inFileName and details.useSmiles: from rdkit.ML.Data import DataUtils conn = None if not details.idName: details.idName = 'ID' try: dataSet = DataUtils.TextFileToData(details.inFileName, onlyCols=[details.idName, details.smilesName]) except IOError: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() idCol = 0 smiCol = 1 elif details.inFileName and details.useSD: conn = None dataset = None if not details.idName: details.idName = 'ID' dataSet = [] try: s = Chem.SDMolSupplier(details.inFileName) except Exception: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() else: while 1: try: m = s.next() except StopIteration: break if m: dataSet.append(m) if reportFreq > 0 and not len(dataSet) % reportFreq: message('Read %d molecules\n' % (len(dataSet))) if details.maxMols > 0 and len(dataSet) >= details.maxMols: break for i, mol in enumerate(dataSet): if mol.HasProp(details.idName): nm = mol.GetProp(details.idName) else: nm = mol.GetProp('_Name') dataSet[i] = (nm, mol) else: dataSet = None fps = None if dataSet and not details.useSD: data = dataSet.GetNamedData() if not details.molPklName: fps = apply(FingerprintsFromSmiles, (data, idCol, smiCol), details.__dict__) else: fps = apply(FingerprintsFromPickles, (data, idCol, smiCol), details.__dict__) elif dataSet and details.useSD: fps = apply(FingerprintsFromMols, (dataSet, ), details.__dict__) if fps: if details.outFileName: outF = open(details.outFileName, 'wb+') for i in range(len(fps)): cPickle.dump(fps[i], outF) outF.close() dbName = details.outDbName or details.dbName if details.outTableName and dbName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbUtils, DbModule conn = DbConnect(dbName) # # We don't have a db open already, so we'll need to figure out # the types of our columns... # colTypes = DbUtils.TypeFinder(data, len(data), len(data[0])) typeStrs = DbUtils.GetTypeStrings([details.idName, details.smilesName], colTypes, keyCol=details.idName) cols = '%s, %s %s' % (typeStrs[0], details.fpColName, DbModule.binaryTypeName) # FIX: we should really check to see if the table # is already there and, if so, add the appropriate # column. # # create the new table # if details.replaceTable or \ details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]: conn.AddTable(details.outTableName, cols) # # And add the data # for ID, fp in fps: tpl = ID, DbModule.binaryHolder(fp.ToBinary()) conn.InsertData(details.outTableName, tpl) conn.Commit() return fps
for example in examples: res = net.ClassifyExample(example[:-1]) print("%f -> %f" % (example[-1], res)) return net def runProfile(command): import random random.seed(23) import profile, pstats datFile = "%s.prof.dat" % (command) profile.run("%s()" % command, datFile) stats = pstats.Stats(datFile) stats.strip_dirs() stats.sort_stats("time").print_stats() if 0: net = testXor() print("Xor:", net) from rdkit.six.moves import cPickle outF = open("xornet.pkl", "wb+") cPickle.dump(net, outF) outF.close() else: # runProfile('testLinear') net = testLinear() # net = testOr()
cat = None obls = None if details.doBuild: if not suppl: message("We require inData to generate a catalog\n") sys.exit(-2) message("Building catalog\n") t1 = time.time() cat = BuildCatalog(suppl, maxPts=details.numMols, minPath=details.minPath, maxPath=details.maxPath) t2 = time.time() message("\tThat took %.2f seconds.\n" % (t2 - t1)) if details.catalogName: message("Dumping catalog data\n") cPickle.dump(cat, open(details.catalogName, 'wb+')) elif details.catalogName: message("Loading catalog\n") cat = cPickle.load(open(details.catalogName, 'rb')) if details.onBitsName: try: obls = cPickle.load(open(details.onBitsName, 'rb')) except Exception: obls = None else: if len(obls) < (inD.count('\n') - 1): obls = None scores = None if details.doScore: if not suppl: message("We require inData to score molecules\n")
obls = None if details.doBuild: if not suppl: message("We require inData to generate a catalog\n") sys.exit(-2) message("Building catalog\n") t1 = time.time() cat = BuildCatalog(suppl, maxPts=details.numMols, minPath=details.minPath, maxPath=details.maxPath) t2 = time.time() message("\tThat took %.2f seconds.\n" % (t2 - t1)) if details.catalogName: message("Dumping catalog data\n") cPickle.dump(cat, open(details.catalogName, 'wb+')) elif details.catalogName: message("Loading catalog\n") cat = cPickle.load(open(details.catalogName, 'rb')) if details.onBitsName: try: obls = cPickle.load(open(details.onBitsName, 'rb')) except Exception: obls = None else: if len(obls) < (inD.count('\n') - 1): obls = None scores = None if details.doScore: if not suppl: message("We require inData to score molecules\n")
cmpd = Chem.AddHs(cmpd) AllChem.EmbedMolecule(cmpd) AllChem.UFFOptimizeMolecule(cmpd) AllChem.CanonicalizeMol(cmpd) # print(Chem.MolToMolBlock(cmpd), file=file('testmol.mol', 'w+')) else: cmpd = Chem.MolFromMolFile('testmol.mol') builder = SubshapeBuilder() if 1: shape = builder.GenerateSubshapeShape(cmpd) v = MolViewer() if 1: tmpFile = tempfile.mktemp('.grd') v.server.deleteAll() Geometry.WriteGridToFile(shape.grid, tmpFile) time.sleep(1) v.ShowMol(cmpd, name='testMol', showOnly=True) v.server.loadSurface(tmpFile, 'testGrid', '', 2.5) v.server.resetCGO('*') with open('subshape.pkl', 'w+') as f: cPickle.dump(shape, f) for i, pt in enumerate(shape.skelPts): v.server.sphere(tuple(pt.location), .5, (1, 0, 1), 'Pt-%d' % i) if not hasattr(pt, 'shapeDirs'): continue momBeg = pt.location - pt.shapeDirs[0] momEnd = pt.location + pt.shapeDirs[0] v.server.cylinder(tuple(momBeg), tuple(momEnd), .1, (1, 0, 1), 'v-%d' % i)
c1.drawPolygon([(100, 100), (100, 200), (200, 200), (200, 100)], fillColor=pid.Color(0, 0, 1)) c1.drawLines([(100, 100, 200, 200), (100, 200, 200, 100)], color=pid.Color(0, 1, 0), width=2) # because the log has been instantiated with clear() as the loggerFlushCommand, # this will blow out the log as well as the contents of the canvas. c1.clear() # draw some more stuff c1.drawPolygon([(100, 100), (100, 200), (200, 200), (200, 100)], fillColor=pid.Color(1, 0, 0)) c1.drawLines([(100, 100, 200, 200), (100, 200, 200, 100)], color=pid.Color(0, 0, 0), width=2) # and write the resulting file. c1.save() # save the log by pickling it. from rdkit.six.moves import cPickle cPickle.dump(c1._LoggerGetLog(), open('foo.pkl', 'wb+')) # create a new canvas c2 = pidPIL.PILCanvas(sz, 'foo.png') # read the pickled log back in t = cPickle.load(open('foo.pkl', 'rb')) # and play the log on the new canvas Logger.replay(t, c2) # there should now be a file 'foo.png' with the image
def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0): if details.lockRandom: seed = details.randomSeed else: import random seed = (random.randint(0, 1e6), random.randint(0, 1e6)) DataUtils.InitRandomNumbers(seed) testExamples = [] if details.shuffleActivities == 1: DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details) elif details.randomActivities == 1: DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details) namedExamples = data.GetNamedData() if details.splitRun == 1: trainIdx, testIdx = SplitData.SplitIndices( len(namedExamples), details.splitFrac, silent=not _verbose) trainExamples = [namedExamples[x] for x in trainIdx] testExamples = [namedExamples[x] for x in testIdx] else: testExamples = [] testIdx = [] trainIdx = list(range(len(namedExamples))) trainExamples = namedExamples if details.filterFrac != 0.0: # if we're doing quantization on the fly, we need to handle that here: if hasattr(details, 'activityBounds') and details.activityBounds: tExamples = [] bounds = details.activityBounds for pt in trainExamples: pt = pt[:] act = pt[-1] placed = 0 bound = 0 while not placed and bound < len(bounds): if act < bounds[bound]: pt[-1] = bound placed = 1 else: bound += 1 if not placed: pt[-1] = bound tExamples.append(pt) else: bounds = None tExamples = trainExamples trainIdx, temp = DataUtils.FilterData(tExamples, details.filterVal, details.filterFrac, -1, indicesOnly=1) tmp = [trainExamples[x] for x in trainIdx] testExamples += [trainExamples[x] for x in temp] trainExamples = tmp counts = DataUtils.CountResults(trainExamples, bounds=bounds) ks = counts.keys() ks.sort() message('Result Counts in training set:') for k in ks: message(str((k, counts[k]))) counts = DataUtils.CountResults(testExamples, bounds=bounds) ks = counts.keys() ks.sort() message('Result Counts in test set:') for k in ks: message(str((k, counts[k]))) nExamples = len(trainExamples) message('Training with %d examples' % (nExamples)) nVars = data.GetNVars() attrs = list(range(1, nVars + 1)) nPossibleVals = data.GetNPossibleVals() for i in range(1, len(nPossibleVals)): if nPossibleVals[i - 1] == -1: attrs.remove(i) if details.pickleDataFileName != '': pickleDataFile = open(details.pickleDataFileName, 'wb+') cPickle.dump(trainExamples, pickleDataFile) cPickle.dump(testExamples, pickleDataFile) pickleDataFile.close() if details.bayesModel: composite = BayesComposite.BayesComposite() else: composite = Composite.Composite() composite._randomSeed = seed composite._splitFrac = details.splitFrac composite._shuffleActivities = details.shuffleActivities composite._randomizeActivities = details.randomActivities if hasattr(details, 'filterFrac'): composite._filterFrac = details.filterFrac if hasattr(details, 'filterVal'): composite._filterVal = details.filterVal composite.SetModelFilterData(details.modelFilterFrac, details.modelFilterVal) composite.SetActivityQuantBounds(details.activityBounds) nPossibleVals = data.GetNPossibleVals() if details.activityBounds: nPossibleVals[-1] = len(details.activityBounds) + 1 if setDescNames: composite.SetInputOrder(data.GetVarNames()) composite.SetDescriptorNames(details._descNames) else: composite.SetDescriptorNames(data.GetVarNames()) composite.SetActivityQuantBounds(details.activityBounds) if details.nModels == 1: details.internalHoldoutFrac = 0.0 if details.useTrees: from rdkit.ML.DecTree import CrossValidate, PruneTree if details.qBounds != []: from rdkit.ML.DecTree import BuildQuantTree builder = BuildQuantTree.QuantTreeBoot else: from rdkit.ML.DecTree import ID3 builder = ID3.ID3Boot driver = CrossValidate.CrossValidationDriver pruner = PruneTree.PruneTree composite.SetQuantBounds(details.qBounds) nPossibleVals = data.GetNPossibleVals() if details.activityBounds: nPossibleVals[-1] = len(details.activityBounds) + 1 composite.Grow( trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, pruner=pruner, nTries=details.nModels, pruneIt=details.pruneIt, lessGreedy=details.lessGreedy, needsQuantization=0, treeBuilder=builder, nQuantBounds=details.qBounds, startAt=details.startAt, maxDepth=details.limitDepth, progressCallback=progressCallback, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors, silent=not _verbose) elif details.useSigTrees: from rdkit.ML.DecTree import CrossValidate from rdkit.ML.DecTree import BuildSigTree builder = BuildSigTree.SigTreeBuilder driver = CrossValidate.CrossValidationDriver nPossibleVals = data.GetNPossibleVals() if details.activityBounds: nPossibleVals[-1] = len(details.activityBounds) + 1 if hasattr(details, 'sigTreeBiasList'): biasList = details.sigTreeBiasList else: biasList = None if hasattr(details, 'useCMIM'): useCMIM = details.useCMIM else: useCMIM = 0 if hasattr(details, 'allowCollections'): allowCollections = details.allowCollections else: allowCollections = False composite.Grow( trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, treeBuilder=builder, maxDepth=details.limitDepth, progressCallback=progressCallback, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors, biasList=biasList, useCMIM=useCMIM, allowCollection=allowCollections, silent=not _verbose) elif details.useKNN: from rdkit.ML.KNN import CrossValidate from rdkit.ML.KNN import DistFunctions driver = CrossValidate.CrossValidationDriver dfunc = '' if (details.knnDistFunc == "Euclidean"): dfunc = DistFunctions.EuclideanDist elif (details.knnDistFunc == "Tanimoto"): dfunc = DistFunctions.TanimotoDist else: assert 0, "Bad KNN distance metric value" composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, numNeigh=details.knnNeighs, holdOutFrac=details.internalHoldoutFrac, distFunc=dfunc) elif details.useNaiveBayes or details.useSigBayes: from rdkit.ML.NaiveBayes import CrossValidate driver = CrossValidate.CrossValidationDriver if not (hasattr(details, 'useSigBayes') and details.useSigBayes): composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, mEstimateVal=details.mEstimateVal, silent=not _verbose) else: if hasattr(details, 'useCMIM'): useCMIM = details.useCMIM else: useCMIM = 0 composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds, mEstimateVal=details.mEstimateVal, useSigs=True, useCMIM=useCMIM, holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection, silent=not _verbose) # # elif details.useSVM: # # from rdkit.ML.SVM import CrossValidate # # driver = CrossValidate.CrossValidationDriver # # composite.Grow(trainExamples, attrs, nPossibleVals=[0]+nPossibleVals, # # buildDriver=driver, nTries=details.nModels, # # needsQuantization=0, # # cost=details.svmCost,gamma=details.svmGamma, # # weights=details.svmWeights,degree=details.svmDegree, # # type=details.svmType,kernelType=details.svmKernel, # # coef0=details.svmCoeff,eps=details.svmEps,nu=details.svmNu, # # cache_size=details.svmCache,shrinking=details.svmShrink, # # dataType=details.svmDataType, # # holdOutFrac=details.internalHoldoutFrac, # # replacementSelection=details.replacementSelection, # # silent=not _verbose) else: from rdkit.ML.Neural import CrossValidate driver = CrossValidate.CrossValidationDriver composite.Grow(trainExamples, attrs, [0] + nPossibleVals, nTries=details.nModels, buildDriver=driver, needsQuantization=0) composite.AverageErrors() composite.SortModels() modelList, counts, avgErrs = composite.GetAllData() counts = numpy.array(counts) avgErrs = numpy.array(avgErrs) composite._varNames = data.GetVarNames() for i in range(len(modelList)): modelList[i].NameModel(composite._varNames) # do final statistics weightedErrs = counts * avgErrs averageErr = sum(weightedErrs) / sum(counts) devs = (avgErrs - averageErr) devs = devs * counts devs = numpy.sqrt(devs * devs) avgDev = sum(devs) / sum(counts) message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' % (100. * averageErr, 100. * avgDev)) if details.bayesModel: composite.Train(trainExamples, verbose=0) # blow out the saved examples and then save the composite: composite.ClearModelExamples() if saveIt: composite.Pickle(details.outName) details.model = DbModule.binaryHolder(cPickle.dumps(composite)) badExamples = [] if not details.detailedRes and (not hasattr(details, 'noScreen') or not details.noScreen): if details.splitRun: message('Testing all hold-out examples') wrong = testall(composite, testExamples, badExamples) message('%d examples (%% %5.2f) were misclassified' % (len(wrong), 100. * float(len(wrong)) / float(len(testExamples)))) _runDetails.holdout_error = float(len(wrong)) / len(testExamples) else: message('Testing all examples') wrong = testall(composite, namedExamples, badExamples) message('%d examples (%% %5.2f) were misclassified' % (len(wrong), 100. * float(len(wrong)) / float(len(namedExamples)))) _runDetails.overall_error = float(len(wrong)) / len(namedExamples) if details.detailedRes: message('\nEntire data set:') resTup = ScreenComposite.ShowVoteResults( range(data.GetNPts()), data, composite, nPossibleVals[-1], details.threshold) nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup nPts = len(namedExamples) nClass = nGood + nBad _runDetails.overall_error = float(nBad) / nClass _runDetails.overall_correct_conf = avgGood _runDetails.overall_incorrect_conf = avgBad _runDetails.overall_result_matrix = repr(voteTab) nRej = nClass - nPts if nRej > 0: _runDetails.overall_fraction_dropped = float(nRej) / nPts if details.splitRun: message('\nHold-out data:') resTup = ScreenComposite.ShowVoteResults( range(len(testExamples)), testExamples, composite, nPossibleVals[-1], details.threshold) nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup nPts = len(testExamples) nClass = nGood + nBad _runDetails.holdout_error = float(nBad) / nClass _runDetails.holdout_correct_conf = avgGood _runDetails.holdout_incorrect_conf = avgBad _runDetails.holdout_result_matrix = repr(voteTab) nRej = nClass - nPts if nRej > 0: _runDetails.holdout_fraction_dropped = float(nRej) / nPts if details.persistTblName and details.dbName: message('Updating results table %s:%s' % (details.dbName, details.persistTblName)) details.Store(db=details.dbName, table=details.persistTblName) if details.badName != '': badFile = open(details.badName, 'w+') for i in range(len(badExamples)): ex = badExamples[i] vote = wrong[i] outStr = '%s\t%s\n' % (ex, vote) badFile.write(outStr) badFile.close() composite.ClearModelExamples() return composite
t.TrainOnLine(examples, net, errTol=0.1, useAvgErr=0) print('classifications:') for example in examples: res = net.ClassifyExample(example[:-1]) print('%f -> %f' % (example[-1], res)) return net def runProfile(command): import random random.seed(23) import profile import pstats datFile = '%s.prof.dat' % (command) profile.run('%s()' % command, datFile) stats = pstats.Stats(datFile) stats.strip_dirs() stats.sort_stats('time').print_stats() if 0: net = testXor() print('Xor:', net) from rdkit.six.moves import cPickle outF = open('xornet.pkl', 'wb+') cPickle.dump(net, outF) outF.close() else: # runProfile('testLinear') net = testLinear() # net = testOr()
cmpd = Chem.MolFromSmiles('C1=CC=C1C#CC1=CC=C1') cmpd = Chem.AddHs(cmpd) AllChem.EmbedMolecule(cmpd) AllChem.UFFOptimizeMolecule(cmpd) AllChem.CanonicalizeMol(cmpd) print >> file('testmol.mol', 'w+'), Chem.MolToMolBlock(cmpd) else: cmpd = Chem.MolFromMolFile('testmol.mol') builder = SubshapeBuilder() if 1: shape = builder.GenerateSubshapeShape(cmpd) v = MolViewer() if 1: import tempfile tmpFile = tempfile.mktemp('.grd') v.server.deleteAll() Geometry.WriteGridToFile(shape.grid, tmpFile) time.sleep(1) v.ShowMol(cmpd, name='testMol', showOnly=True) v.server.loadSurface(tmpFile, 'testGrid', '', 2.5) v.server.resetCGO('*') cPickle.dump(shape, file('subshape.pkl', 'w+')) for i, pt in enumerate(shape.skelPts): v.server.sphere(tuple(pt.location), .5, (1, 0, 1), 'Pt-%d' % i) if not hasattr(pt, 'shapeDirs'): continue momBeg = pt.location - pt.shapeDirs[0] momEnd = pt.location + pt.shapeDirs[0] v.server.cylinder(tuple(momBeg), tuple(momEnd), .1, (1, 0, 1), 'v-%d' % i)
def FingerprintsFromDetails(details, reportFreq=10): data = None if details.dbName and details.tableName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbInfo from rdkit.ML.Data import DataUtils try: conn = DbConnect(details.dbName, details.tableName) except Exception: import traceback error('Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() if not details.idName: details.idName = DbInfo.GetColumnNames(details.dbName, details.tableName)[0] dataSet = DataUtils.DBToData(details.dbName, details.tableName, what='%s,%s' % (details.idName, details.smilesName)) idCol = 0 smiCol = 1 elif details.inFileName and details.useSmiles: from rdkit.ML.Data import DataUtils conn = None if not details.idName: details.idName = 'ID' try: dataSet = DataUtils.TextFileToData( details.inFileName, onlyCols=[details.idName, details.smilesName]) except IOError: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() idCol = 0 smiCol = 1 elif details.inFileName and details.useSD: conn = None dataset = None if not details.idName: details.idName = 'ID' dataSet = [] try: s = Chem.SDMolSupplier(details.inFileName) except Exception: import traceback error('Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() else: while 1: try: m = s.next() except StopIteration: break if m: dataSet.append(m) if reportFreq > 0 and not len(dataSet) % reportFreq: message('Read %d molecules\n' % (len(dataSet))) if details.maxMols > 0 and len( dataSet) >= details.maxMols: break for i, mol in enumerate(dataSet): if mol.HasProp(details.idName): nm = mol.GetProp(details.idName) else: nm = mol.GetProp('_Name') dataSet[i] = (nm, mol) else: dataSet = None fps = None if dataSet and not details.useSD: data = dataSet.GetNamedData() if not details.molPklName: fps = apply(FingerprintsFromSmiles, (data, idCol, smiCol), details.__dict__) else: fps = apply(FingerprintsFromPickles, (data, idCol, smiCol), details.__dict__) elif dataSet and details.useSD: fps = apply(FingerprintsFromMols, (dataSet, ), details.__dict__) if fps: if details.outFileName: outF = open(details.outFileName, 'wb+') for i in range(len(fps)): cPickle.dump(fps[i], outF) outF.close() dbName = details.outDbName or details.dbName if details.outTableName and dbName: from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbUtils, DbModule conn = DbConnect(dbName) # # We don't have a db open already, so we'll need to figure out # the types of our columns... # colTypes = DbUtils.TypeFinder(data, len(data), len(data[0])) typeStrs = DbUtils.GetTypeStrings( [details.idName, details.smilesName], colTypes, keyCol=details.idName) cols = '%s, %s %s' % (typeStrs[0], details.fpColName, DbModule.binaryTypeName) # FIX: we should really check to see if the table # is already there and, if so, add the appropriate # column. # # create the new table # if details.replaceTable or \ details.outTableName.upper() not in [x.upper() for x in conn.GetTableNames()]: conn.AddTable(details.outTableName, cols) # # And add the data # for ID, fp in fps: tpl = ID, DbModule.binaryHolder(fp.ToBinary()) conn.InsertData(details.outTableName, tpl) conn.Commit() return fps
def GenRandomExamples(nVars=10, randScale=0.3, bitProb=0.5, nExamples=500, seed=(0, 0), addResults=1): random.seed(seed[0]) varWeights = numpy.array([random.random() for _ in range(nVars)]) * randScale examples = [None] * nExamples for i in range(nExamples): varVals = [random.random() > bitProb for _ in range(nVars)] temp = numpy.array(varVals) * varWeights res = sum(temp) if addResults: varVals.append(res >= 1.) examples[i] = varVals nPossibleVals = [2] * (nExamples + 1) attrs = list(range(nVars)) return (examples, attrs, nPossibleVals) if __name__ == '__main__': # pragma: nocover from rdkit.six.moves import cPickle examples, attrs, nPossibleVals = GenRandomExamples() outF = open('random.dat.pkl', 'wb+') cPickle.dump(examples, outF) cPickle.dump(attrs, outF) cPickle.dump(nPossibleVals, outF) tree = ID3.ID3Boot(examples, attrs, nPossibleVals) tree.Pickle('save.pkl')
cmpd = Chem.MolFromSmiles('C1=CC=C1C#CC1=CC=C1') cmpd = Chem.AddHs(cmpd) AllChem.EmbedMolecule(cmpd) AllChem.UFFOptimizeMolecule(cmpd) AllChem.CanonicalizeMol(cmpd) print >>file('testmol.mol','w+'),Chem.MolToMolBlock(cmpd) else: cmpd = Chem.MolFromMolFile('testmol.mol') builder=SubshapeBuilder() if 1: shape=builder.GenerateSubshapeShape(cmpd) v = MolViewer() if 1: import tempfile tmpFile = tempfile.mktemp('.grd') v.server.deleteAll() Geometry.WriteGridToFile(shape.grid,tmpFile) time.sleep(1) v.ShowMol(cmpd,name='testMol',showOnly=True) v.server.loadSurface(tmpFile,'testGrid','',2.5) v.server.resetCGO('*') cPickle.dump(shape,file('subshape.pkl','w+')) for i,pt in enumerate(shape.skelPts): v.server.sphere(tuple(pt.location),.5,(1,0,1),'Pt-%d'%i) if not hasattr(pt,'shapeDirs'): continue momBeg = pt.location-pt.shapeDirs[0] momEnd = pt.location+pt.shapeDirs[0] v.server.cylinder(tuple(momBeg),tuple(momEnd),.1,(1,0,1),'v-%d'%i)
# create a logged canvas and draw on it sz = (300,300) c1 = Logger.Logger(pidSVG.SVGCanvas,sz,'foo.svg',loggerFlushCommand='clear') c1.drawPolygon([(100,100),(100,200),(200,200),(200,100)],fillColor=pid.Color(0,0,1)) c1.drawLines([(100,100,200,200),(100,200,200,100)],color=pid.Color(0,1,0),width=2) # because the log has been instantiated with clear() as the loggerFlushCommand, # this will blow out the log as well as the contents of the canvas. c1.clear() # draw some more stuff c1.drawPolygon([(100,100),(100,200),(200,200),(200,100)],fillColor=pid.Color(1,0,0)) c1.drawLines([(100,100,200,200),(100,200,200,100)],color=pid.Color(0,0,0),width=2) # and write the resulting file. c1.save() # save the log by pickling it. from rdkit.six.moves import cPickle cPickle.dump(c1._LoggerGetLog(),open('foo.pkl','wb+')) # create a new canvas c2 = pidPIL.PILCanvas(sz,'foo.png') # read the pickled log back in t = cPickle.load(open('foo.pkl','rb')) # and play the log on the new canvas Logger.replay(t,c2) # there should now be a file 'foo.png' with the image