def predict(self,**kw): from MGT.Taxa import loadTaxaTree opt = self.opt prOpt = opt.prOpt store = self.store clOpt = prOpt.clOpt.copy() assert len(clOpt.thresh) == 1 clOpt.runMode = "inproc" #we would need to split this into two methods for "batchDep" featStoreTrain = self.getFeatStore(opt.featName) clOpt.modelRoot = pjoin(featStoreTrain.getPath(),"train/1",clOpt.modelRoot) sampStorePred = SampStore.open(path=opt.sampStorePred) featStore = sampStorePred.loadStore(name=opt.featName) featStore.predict(opt=clOpt,name=prOpt.runName) #nameIL = store.getIdLabsName()+'.tr' nameIL = None idLab = store.loadIdLabs(name=nameIL) labToName = idLab.getLabToName() predStore = featStore.predictStore(name=prOpt.runName) pred = predStore.loadObj("pred") if prOpt.get("outExportDir",None) is not None: outStore = DirStore.open(path=prOpt.outExportDir,mode="w") #"c" can zap cwd of the user else: outStore = predStore idToName = {} for id,lab in zip(pred.idPred["id"],pred.labPred[0]): idToName[id] = labToName[lab] taxaTree = loadTaxaTree() idToLin = {} for id,labn in idToName.items(): if not isinstance(labn,str): idToLin[id] = taxaTree.getNode(labn).lineageStr() else: idToLin[id] = labn out = open(outStore.getFilePath("idlin.csv"),'w') for (id,lin) in sorted(idToLin.items()): out.write("%s\t%s\n" % (id,lin)) out.close() linCnt = "\n".join(["%s\t%s" % y for y in sorted(binCount(idToLin.values()).items(),key=lambda x:-x[1])]) out = open(outStore.getFilePath("lincnt.csv"),'w') out.write(linCnt) out.write("\n") out.close()
def getTaxaTree(self): if self.taxaTree is None: self.taxaTree = loadTaxaTree() return self.taxaTree