def procScoresRefAgainstRef(jobs): opt = optTpl.copy() opt.mode = "proc-scores" opt.outScoreComb = pjoin(opt.cwd,"results","combined.score") opt.predOutDir = pjoin(opt.cwd,"results") imm = ImmClassifierApp(opt=opt) imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def predict(self): topSampDir = "/usr/local/projects/GOS3/Tier2/sequencing_technology_comparison/assembly_comparison" topAsmDir = topSampDir asmKmerSize=31 asmReadLen=100 sampDirs = [ pjoin(topAsmDir,l) for l in self.sampSubDirs ] #print sampSubDirs makedir(self.topPredDir) metaCsv = pjoin(self.topWorkDir,"baltic_meta.csv") jobsFin = [] for sampDir in sampDirs: d = os.path.basename(sampDir) workDir = pjoin(self.topPredDir,d) makedir(workDir) try: os.chdir(workDir) asmDir = pjoin(sampDir,"velvet") inpFastaOrig = pjoin(asmDir,"contigs.fa.gz") inpFastaPred = pjoin(workDir,"pred_inp.fna") filterFastaByLength(inpFastaOrig,inpFastaPred, minLen=300,lineLen=1000) sampAttr = pjoin(workDir,"samp.attr.csv") outCnt = open(sampAttr,"w") contigReadCountVelvet(contFasta=inpFastaPred,kmerSize=asmKmerSize,readLen=asmReadLen,out=outCnt) outCnt.close() opt = Struct() opt.runMode = "batchDep" #"inproc" opt.inpSeq = inpFastaPred opt.predMinLenSamp = 300 opt.sampAttrib = sampAttr opt.predOutDir = pjoin(workDir,"results") opt.lrmUserOptions = '-P 9223' opt.mode = "predict" #"export-predictions" ImmClassifierApp.fillWithDefaultOptions(opt) jobs = [] app = ImmClassifierApp(opt=opt) jobs = app.run(depend=jobs) #print opt jobsFin += jobs finally: os.chdir(self.topWorkDir) return jobsFin
def scoreRefAgainstRef(jobs): opt = optTpl.copy() opt.mode = "score" opt.immDb = [pjoin(opt.cwd,"imm")] opt.inpSeq = pjoin(seqDbPath1,"195.fasta.gz") opt.outScoreComb = pjoin(opt.cwd,"results","combined.score") imm = ImmClassifierApp(opt=opt) imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def trainRef(jobs): opt = optTpl.copy() opt.mode = "train" opt.immDb = [pjoin(opt.cwd,"imm")] opt.seqDb = pjoin(opt.cwd,"seqdb") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def procScoresCustomAgainstJoint(jobs): opt = optTpl.copy() opt.mode = "proc-scores" opt.outScoreComb = pjoin(opt.cwd,"92830.1.join.combined.score") opt.predOutDir = pjoin(opt.cwd,"92830.1.join.results") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def scoreCustomWithParentAgainstJoint(jobs): opt = optTpl.copy() opt.mode = "score" opt.immDb = [pjoin(opt.cwd,"imm"),pjoin(opt.cwd,"custom_with_parent.immdb")] opt.inpSeq = pjoin(seqDbPath2,"custom_with_parent.fasta.gz") opt.outScoreComb = pjoin(opt.cwd,"custom_with_parent.join.combined.score") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def makeSeqDbCustom(jobs): opt = optTpl.copy() opt.mode = "make-ref-seqdb" opt.inpTrainSeq = pjoin(seqDbPath2,"generic.mod.train.fasta.gz") opt.inpTrainModelDescr = pjoin(seqDbPath2,"generic.mod.train.json") opt.inpTrainSeqFormat = "generic" opt.seqDb = pjoin(opt.cwd,"92830.seqdb") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def predict(self): topSampDir = "/usr/local/depot/projects/GOS/baltic" topAsmDir = pjoin(topSampDir,"assembly") sampDirs = [ pjoin(topAsmDir,l) for l in self.sampSubDirs ] #print sampSubDirs makedir(self.topPredDir) metaCsv = pjoin(self.topWorkDir,"baltic_meta.csv") jobsFin = [] icmDbRef = pjoin(os.environ["GOSII_WORK"],"icm-refseq") for sampDir in sampDirs: d = os.path.basename(sampDir) workDir = pjoin(self.topPredDir,d) makedir(workDir) try: os.chdir(workDir) sampAttr = pjoin(workDir,"samp.attr.csv") outCnt = open(sampAttr,"w") contigReadCount454(asmDir=sampDir,out=outCnt) outCnt.close() opt = Struct() opt.runMode = "batchDep" #"inproc" opt.immDb = icmDbRef opt.inpSeq = pjoin(sampDir,"454AllContigs.fna") opt.sampAttrib = sampAttr opt.predMinLenSamp = 1000 jobs = [] for mode in ("predict",): opt.mode = mode #"predict" "proc-scores" #"proc-scores-phymm" #"perf" #"proc-scores" app = ImmClassifierApp(opt=opt) jobs = app.run(depend=jobs) jobsFin += jobs finally: os.chdir(topWorkDir) return jobsFin
def makeSeqDbRef(jobs): opt = optTpl.copy() opt.mode = "make-ref-seqdb" opt.inpTrainSeq = pjoin(seqDbPath1,"*.fasta.gz") opt.inpTrainSeqFormat = "ncbi" opt.immDb = [pjoin(opt.cwd,"imm")] opt.seqDb = pjoin(opt.cwd,"seqdb") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def procScoresRefAgainstCustom(jobs,inpIsSeqDb=False): opt = optTpl.copy() opt.mode = "proc-scores" opt.outScoreComb = pjoin(opt.cwd,"92830.combined.score") opt.predOutDir = pjoin(opt.cwd,"92830.results") if inpIsSeqDb: opt.sampAttrib = None else: opt.sampAttrib = pjoin(seqDbPath1,"195.immClassifier.attrib.csv") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def scoreRefAgainstCustom(jobs,inpIsSeqDb=False): opt = optTpl.copy() opt.mode = "score" opt.immDb = [pjoin(opt.cwd,"92830.immdb")] if inpIsSeqDb: opt.inpSeq = pjoin(opt.cwd,"92830.seqdb") else: opt.inpSeq = pjoin(seqDbPath1,"195.fasta.gz") opt.outScoreComb = pjoin(opt.cwd,"92830.combined.score") ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def trainCustom(jobs): opt = optTpl.copy() opt.mode = "train" opt.seqDb = pjoin(opt.cwd,"92830.seqdb") opt.immDb = [pjoin(opt.cwd,"92830.immdb")] opt.trainMinLenSamp = 1 opt.stdout = "stdout.log" opt.stderr = "stderr.log" ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
def trainCustomWithParent(jobs): opt = optTpl.copy() opt.mode = "train" opt.inpTrainSeq = pjoin(seqDbPath2,"custom_with_parent.fasta.gz") opt.seqDb = pjoin(opt.cwd,"custom_with_parent.seqdb") opt.taxaTreePkl = pjoin(opt.cwd,"custom_with_parent.tree.pkl") #opt.immDbArchive = [pjoin(opt.cwd,"custom_with_parent.immdb.tar")] opt.immDb = [pjoin(opt.cwd,"custom_with_parent.immdb")] opt.trainMinLenSamp = 1 opt.stdout = "stdout.log" opt.stderr = "stderr.log" ImmClassifierApp.fillWithDefaultOptions(opt) print opt imm = ImmClassifierApp(opt=opt) jobs = imm.run(depend=jobs) run_makeflow_if(opt) return jobs
opt.immDb = pjoin(topWorkDir,"icm-%s" % refname) opt.workDir = pjoin(topWorkDir,"ph-gos-bac") #opt.predSeq = pjoin(topRndSeqDir,"query.5K.fna") #opt.predSeq = "/usr/local/projects/GOSII/shannon/Indian_Ocean_Viral/asm_combined_454_large/454LargeContigs.fna" opt.predSeq = pjoin(topWorkDir,"scaff-gos-vir","asm_combined_454_large.5K.fna") #opt.predSeq = pjoin(opt.workDir,"asm_combined_454_large.5K.rnd.fna") opt.predOutDir = pjoin(topPredDir,"asm_combined_454_large") #opt.predOutDir = pjoin(topWorkDir,"icm-%s-scale-score" % refname) opt.outDir = opt.predOutDir opt.rndScoreComb = pjoin(topWorkDir,"icm-%s-scale-score" % refname,"combined.score.pkl.gz") opt.nImmBatches = 200 opt.predMinLenSamp = 5000 for mode in ("proc-scores",): opt.mode = mode #"predict" "proc-scores" #"proc-scores-phymm" #"perf" #"proc-scores" app = ImmClassifierApp(opt=opt) jobs = app.run(depend=jobs) sys.exit(0) opt.cwd = opt.workDir opt.outScaleDir = pjoin(topWorkDir,"icm-%s-scale" % refname) opt.outScoreDir = pjoin(topWorkDir,"icm-%s-scale-score" % refname) for mode in ("score",): #generate score opt.mode = mode app = ImmScalingApp(opt=opt) jobs = app.run(depend=jobs) elif stage == "gos": opt = Struct()