Ejemplo n.º 1
0
def procScoresRefAgainstRef(jobs):
    
    opt = optTpl.copy()
    opt.mode = "proc-scores"
    opt.outScoreComb = pjoin(opt.cwd,"results","combined.score")
    opt.predOutDir = pjoin(opt.cwd,"results")

    imm = ImmClassifierApp(opt=opt)
    imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 2
0
    def predict(self):

        topSampDir = "/usr/local/projects/GOS3/Tier2/sequencing_technology_comparison/assembly_comparison"
        topAsmDir = topSampDir
        asmKmerSize=31
        asmReadLen=100


        sampDirs = [ pjoin(topAsmDir,l) for l in self.sampSubDirs ]

        #print sampSubDirs

        makedir(self.topPredDir)

        metaCsv = pjoin(self.topWorkDir,"baltic_meta.csv")

        jobsFin = []

        for sampDir in sampDirs:
            d = os.path.basename(sampDir)
            workDir = pjoin(self.topPredDir,d)
            makedir(workDir)
            try:
                os.chdir(workDir)
                asmDir = pjoin(sampDir,"velvet")
                inpFastaOrig = pjoin(asmDir,"contigs.fa.gz")
                inpFastaPred = pjoin(workDir,"pred_inp.fna")
                filterFastaByLength(inpFastaOrig,inpFastaPred,
                        minLen=300,lineLen=1000)
                
                sampAttr = pjoin(workDir,"samp.attr.csv")
                outCnt = open(sampAttr,"w")
                contigReadCountVelvet(contFasta=inpFastaPred,kmerSize=asmKmerSize,readLen=asmReadLen,out=outCnt)
                outCnt.close()
                
                opt = Struct()
                opt.runMode = "batchDep" #"inproc"
                opt.inpSeq = inpFastaPred
                opt.predMinLenSamp = 300
                opt.sampAttrib = sampAttr
                opt.predOutDir = pjoin(workDir,"results")
                opt.lrmUserOptions = '-P 9223'
                opt.mode = "predict" #"export-predictions"
                ImmClassifierApp.fillWithDefaultOptions(opt)
                jobs = []
                app = ImmClassifierApp(opt=opt)
                jobs = app.run(depend=jobs)
                #print opt

                jobsFin += jobs
            finally:
                os.chdir(self.topWorkDir)
        return jobsFin
Ejemplo n.º 3
0
def scoreRefAgainstRef(jobs):

    opt = optTpl.copy()
    opt.mode = "score"
    opt.immDb = [pjoin(opt.cwd,"imm")]
    opt.inpSeq = pjoin(seqDbPath1,"195.fasta.gz")
    opt.outScoreComb = pjoin(opt.cwd,"results","combined.score")

    imm = ImmClassifierApp(opt=opt)
    imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 4
0
def trainRef(jobs):

    opt = optTpl.copy()
    opt.mode = "train"
    opt.immDb = [pjoin(opt.cwd,"imm")]
    opt.seqDb = pjoin(opt.cwd,"seqdb")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 5
0
def procScoresCustomAgainstJoint(jobs):

    opt = optTpl.copy()
    opt.mode = "proc-scores"
    opt.outScoreComb = pjoin(opt.cwd,"92830.1.join.combined.score")
    opt.predOutDir = pjoin(opt.cwd,"92830.1.join.results")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 6
0
def scoreCustomWithParentAgainstJoint(jobs):

    opt = optTpl.copy()
    opt.mode = "score"
    opt.immDb = [pjoin(opt.cwd,"imm"),pjoin(opt.cwd,"custom_with_parent.immdb")]
    opt.inpSeq = pjoin(seqDbPath2,"custom_with_parent.fasta.gz")
    opt.outScoreComb = pjoin(opt.cwd,"custom_with_parent.join.combined.score")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 7
0
def makeSeqDbCustom(jobs):

    opt = optTpl.copy()
    opt.mode = "make-ref-seqdb"
    opt.inpTrainSeq = pjoin(seqDbPath2,"generic.mod.train.fasta.gz")
    opt.inpTrainModelDescr = pjoin(seqDbPath2,"generic.mod.train.json")
    opt.inpTrainSeqFormat = "generic"
    opt.seqDb = pjoin(opt.cwd,"92830.seqdb")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 8
0
    def predict(self):

        topSampDir = "/usr/local/depot/projects/GOS/baltic"
        topAsmDir = pjoin(topSampDir,"assembly")


        sampDirs = [ pjoin(topAsmDir,l) for l in self.sampSubDirs ]

        #print sampSubDirs

        makedir(self.topPredDir)

        metaCsv = pjoin(self.topWorkDir,"baltic_meta.csv")

        jobsFin = []

        icmDbRef = pjoin(os.environ["GOSII_WORK"],"icm-refseq")
        for sampDir in sampDirs:
            d = os.path.basename(sampDir)
            workDir = pjoin(self.topPredDir,d)
            makedir(workDir)
            try:
                os.chdir(workDir)
                sampAttr = pjoin(workDir,"samp.attr.csv")
                outCnt = open(sampAttr,"w")
                contigReadCount454(asmDir=sampDir,out=outCnt)
                outCnt.close()

                opt = Struct()
                opt.runMode = "batchDep" #"inproc"
                opt.immDb = icmDbRef
                opt.inpSeq = pjoin(sampDir,"454AllContigs.fna")
                opt.sampAttrib = sampAttr
                opt.predMinLenSamp = 1000

                jobs = []

                for mode in ("predict",):
                    opt.mode = mode #"predict" "proc-scores" #"proc-scores-phymm" #"perf" #"proc-scores"
                    app = ImmClassifierApp(opt=opt)
                    jobs = app.run(depend=jobs)
                jobsFin += jobs
            finally:
                os.chdir(topWorkDir)
        return jobsFin
Ejemplo n.º 9
0
def makeSeqDbRef(jobs):

    opt = optTpl.copy()
    opt.mode = "make-ref-seqdb"
    opt.inpTrainSeq = pjoin(seqDbPath1,"*.fasta.gz")
    opt.inpTrainSeqFormat = "ncbi"

    opt.immDb = [pjoin(opt.cwd,"imm")]
    opt.seqDb = pjoin(opt.cwd,"seqdb")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 10
0
def procScoresRefAgainstCustom(jobs,inpIsSeqDb=False):

    opt = optTpl.copy()
    opt.mode = "proc-scores"
    opt.outScoreComb = pjoin(opt.cwd,"92830.combined.score")
    opt.predOutDir = pjoin(opt.cwd,"92830.results")
    if inpIsSeqDb:
        opt.sampAttrib = None
    else:
        opt.sampAttrib = pjoin(seqDbPath1,"195.immClassifier.attrib.csv")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 11
0
def scoreRefAgainstCustom(jobs,inpIsSeqDb=False):

    opt = optTpl.copy()
    opt.mode = "score"
    opt.immDb = [pjoin(opt.cwd,"92830.immdb")]
    if inpIsSeqDb:
        opt.inpSeq = pjoin(opt.cwd,"92830.seqdb")
    else:
        opt.inpSeq = pjoin(seqDbPath1,"195.fasta.gz")
    opt.outScoreComb = pjoin(opt.cwd,"92830.combined.score")

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 12
0
def trainCustom(jobs):

    opt = optTpl.copy()
    opt.mode = "train"
    opt.seqDb = pjoin(opt.cwd,"92830.seqdb")
    opt.immDb = [pjoin(opt.cwd,"92830.immdb")]
    opt.trainMinLenSamp = 1
    
    opt.stdout = "stdout.log"
    opt.stderr = "stderr.log"

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 13
0
def trainCustomWithParent(jobs):

    opt = optTpl.copy()
    opt.mode = "train"
    opt.inpTrainSeq = pjoin(seqDbPath2,"custom_with_parent.fasta.gz")
    opt.seqDb = pjoin(opt.cwd,"custom_with_parent.seqdb")
    opt.taxaTreePkl = pjoin(opt.cwd,"custom_with_parent.tree.pkl")
    #opt.immDbArchive = [pjoin(opt.cwd,"custom_with_parent.immdb.tar")]
    opt.immDb = [pjoin(opt.cwd,"custom_with_parent.immdb")]
    opt.trainMinLenSamp = 1
    
    opt.stdout = "stdout.log"
    opt.stderr = "stderr.log"

    ImmClassifierApp.fillWithDefaultOptions(opt)

    print opt

    imm = ImmClassifierApp(opt=opt)
    jobs = imm.run(depend=jobs)
    run_makeflow_if(opt)
    return jobs
Ejemplo n.º 14
0
    opt.immDb = pjoin(topWorkDir,"icm-%s" % refname)
    opt.workDir = pjoin(topWorkDir,"ph-gos-bac")
    #opt.predSeq = pjoin(topRndSeqDir,"query.5K.fna")
    #opt.predSeq = "/usr/local/projects/GOSII/shannon/Indian_Ocean_Viral/asm_combined_454_large/454LargeContigs.fna"
    opt.predSeq = pjoin(topWorkDir,"scaff-gos-vir","asm_combined_454_large.5K.fna")
    #opt.predSeq = pjoin(opt.workDir,"asm_combined_454_large.5K.rnd.fna")
    opt.predOutDir = pjoin(topPredDir,"asm_combined_454_large")
    #opt.predOutDir = pjoin(topWorkDir,"icm-%s-scale-score" % refname)
    opt.outDir = opt.predOutDir
    opt.rndScoreComb = pjoin(topWorkDir,"icm-%s-scale-score" % refname,"combined.score.pkl.gz")
    opt.nImmBatches = 200
    opt.predMinLenSamp = 5000

    for mode in ("proc-scores",):
        opt.mode = mode #"predict" "proc-scores" #"proc-scores-phymm" #"perf" #"proc-scores"
        app = ImmClassifierApp(opt=opt)
        jobs = app.run(depend=jobs)
    sys.exit(0)
    opt.cwd = opt.workDir
    opt.outScaleDir = pjoin(topWorkDir,"icm-%s-scale" % refname)
    opt.outScoreDir = pjoin(topWorkDir,"icm-%s-scale-score" % refname)
    
    for mode in ("score",): #generate score
        opt.mode = mode
        app = ImmScalingApp(opt=opt)
        jobs = app.run(depend=jobs)


elif stage == "gos":

    opt = Struct()