def completeTest(self, treelikerArgs, processes=1): self.generateExamplesUnified() bestClassifiers = [] terms = self.termsByDepth( ) # This sorting is needed later in bnet learning treeliker = TreeLikerWrapper(self, *treelikerArgs) def processTerm(term): return term, treeliker.runTermTest(term) nets = defaultdict(dict) allresults = tuple(parallel_map_dill(processes, processTerm, terms)) combis = set() for term, learned in allresults: for clfName, i in learned: combis.add((term, clfName)) #for clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation in folds: if clfName in nets[i]: net = nets[i][clfName] else: net = BayesNet(i, clfName, self) nets[i][clfName] = net net.generateCPD( term ) #, clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation) for i, byClf in sorted(nets.items()): for clfName, net in byClf.items(): net.bake() net.predict() debug("Generating plots.") #for term, learned in allresults: # for clfName, folds in learned.items(): plt.figure(figsize=(6, 12)) for term, clfName in combis: plt.clf() termN = self[term]['name'] cvdir = getTermPath(termN) #folds2 = [(nets[i][clfName].nodeAsClf(term),)+f[1:] for i,f in enumerate(folds)] s1 = plt.subplot(211, adjustable='box', aspect=1) s1.axis('equal') #s1.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plotRoc(termN, clfName, termN) s2 = plt.subplot(212, adjustable='box', aspect=1) s2.axis('equal') #s2.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plotRoc(termN, clfName, "Bayes correction", clfs=(nets[i][clfName].nodeAsClf(term) for i in range(NUM_FOLDS))) #plotRoc("Bayes correction", folds2) print(str(cvdir / (clfName.replace(" ", "_") + '_roc.png'))) plt.savefig(str(cvdir / (clfName.replace(" ", "_") + '_roc.png'))) plt.savefig(str(cvdir / (clfName.replace(" ", "_") + '_roc.ps'))) debug("Finished complete test.")
def completeTest(self, treelikerArgs, processes = 1): self.generateExamplesUnified() bestClassifiers = [] terms = self.termsByDepth() # This sorting is needed later in bnet learning treeliker = TreeLikerWrapper(self, *treelikerArgs) def processTerm(term): return term, treeliker.runTermTest(term) nets = defaultdict(dict) allresults = tuple(parallel_map_dill(processes, processTerm, terms)) combis = set() for term, learned in allresults: for clfName, i in learned: combis.add((term,clfName)) #for clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation in folds: if clfName in nets[i]: net = nets[i][clfName] else: net = BayesNet(i, clfName, self) nets[i][clfName] = net net.generateCPD(term)#, clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation) for i, byClf in sorted(nets.items()): for clfName, net in byClf.items(): net.bake() net.predict() debug("Generating plots.") #for term, learned in allresults: # for clfName, folds in learned.items(): plt.figure(figsize = (6,12)) for term,clfName in combis: plt.clf() termN = self[term]['name'] cvdir = getTermPath(termN) #folds2 = [(nets[i][clfName].nodeAsClf(term),)+f[1:] for i,f in enumerate(folds)] s1 = plt.subplot(211, adjustable='box', aspect=1) s1.axis('equal') #s1.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plotRoc(termN, clfName, termN) s2 = plt.subplot(212, adjustable='box', aspect=1) s2.axis('equal') #s2.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plotRoc(termN, clfName, "Bayes correction", clfs = (nets[i][clfName].nodeAsClf(term) for i in range(NUM_FOLDS) )) #plotRoc("Bayes correction", folds2) print(str(cvdir/(clfName.replace(" ","_")+'_roc.png'))) plt.savefig(str(cvdir/(clfName.replace(" ","_")+'_roc.png'))) plt.savefig(str(cvdir/(clfName.replace(" ","_")+'_roc.ps'))) debug("Finished complete test.")
def runTermTest(self, term): term = self.ontology[term]['name'] debug("Preparing for TreeLiker on term %s." % term) resultPath = getTermPath(term) batchPath = resultPath / 'batch.treeliker' datasetPath = resultPath / 'dataset.txt' batchFile = "set(algorithm, relf_grounding_counting)\n" \ "set(verbosity, %d)\n" \ "set(output_type, train_test)\n" \ "set(examples, '%s')\n" \ "set(template, [%s])\n" \ "set(use_sampling, true)\n" \ "set(num_samples, %d)\n" \ "set(sample_size, %d)\n" \ "set(covered_class, '%s')\n\n" % ( dp.utils.verbosity, datasetPath.name, self.template, self.samples, self.sample_size, term) with datasetPath.open() as ds: dataSetLen = len([*ds]) # Counts lines for i, (train, test) in enumerate( cross_validation.KFold(dataSetLen, NUM_FOLDS)): path = resultPath / str(i) if not path.is_dir(): path.mkdir() batchFile += "set(output, '%s')\n" \ "set(train_set, [%s])\n" \ "set(test_set, [%s])\n" \ "work(yes)\n" % ( path.name, ",".join(map(str,train)), ",".join(map(str,test))) with batchPath.open('w') as bf: bf.write(batchFile) self._runTreeLiker(resultPath, batchPath) return learningTest(resultPath)
def runTermTest(self, term): term = self.ontology[term]['name'] debug("Preparing for TreeLiker on term %s." % term) resultPath = getTermPath(term) batchPath = resultPath / 'batch.treeliker' datasetPath = resultPath / 'dataset.txt' batchFile = "set(algorithm, relf_grounding_counting)\n" \ "set(verbosity, %d)\n" \ "set(output_type, train_test)\n" \ "set(examples, '%s')\n" \ "set(template, [%s])\n" \ "set(use_sampling, true)\n" \ "set(num_samples, %d)\n" \ "set(sample_size, %d)\n" \ "set(covered_class, '%s')\n\n" % ( dp.utils.verbosity, datasetPath.name, self.template, self.samples, self.sample_size, term) with datasetPath.open() as ds: dataSetLen = len([*ds]) # Counts lines for i, (train, test) in enumerate(cross_validation.KFold(dataSetLen, NUM_FOLDS)): path = resultPath / str(i) if not path.is_dir(): path.mkdir() batchFile += "set(output, '%s')\n" \ "set(train_set, [%s])\n" \ "set(test_set, [%s])\n" \ "work(yes)\n" % ( path.name, ",".join(map(str,train)), ",".join(map(str,test))) with batchPath.open('w') as bf: bf.write(batchFile) self._runTreeLiker(resultPath, batchPath) return learningTest(resultPath)
def generateExamplesUnified(self): #return debug("Generating unified datasets.") terms = self.termsByDepth(False) #rootname = self.ontology[self.root]['name'] with ExitStack() as stack: # Closes all files when exited files = [(term, stack.enter_context((getTermPath(term) / 'dataset.txt').open('w'))) for term in (self[t]['name'] for t in self.ontology.keys()) ]#if term != rootname] #for i, geneName in enumerate(self.genes): for geneName in self.genes: #debug("%d. Writing gene %s." % (i, geneName)) gene = self.geneFactory.getGene(geneName) repg = ", ".join(gene.logicalRepresentation()) for term, output in files: if geneName not in self.associations[term]: term = '~'+term e = '"%s" %s' % (term, repg) print(e, file=output)
def generateExamplesUnified(self): #return debug("Generating unified datasets.") terms = self.termsByDepth(False) #rootname = self.ontology[self.root]['name'] with ExitStack() as stack: # Closes all files when exited files = [ (term, stack.enter_context( (getTermPath(term) / 'dataset.txt').open('w'))) for term in (self[t]['name'] for t in self.ontology.keys()) ] #if term != rootname] #for i, geneName in enumerate(self.genes): for geneName in self.genes: #debug("%d. Writing gene %s." % (i, geneName)) gene = self.geneFactory.getGene(geneName) repg = ", ".join(gene.logicalRepresentation()) for term, output in files: if geneName not in self.associations[term]: term = '~' + term e = '"%s" %s' % (term, repg) print(e, file=output)