예제 #1
0
    def varscan2(self):
        reference = self.reference
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        bedFile = self.bed
        threads = self.threads
        MAF = self.MAF
        DP = self.filtDP
        minCov = str(int(int(DP) * float(MAF)))

        tmpDir = resultsDir + "/tempFile/varscan2_" + sampleID
        mkdir(tmpDir)
        vs = "/home/bioinfo/ubuntu/software/VarScan.v2.3.9/VarScan.v2.3.9.jar"
        
        if pairID == None:
            print("varscan2仅适用于配对分析,请指定配对样本并重新运行")
            exit()

        cmd = """
            samtools mpileup -B -f {reference} -q 15 -d 10000 \\
                {resultsDir}/bam/{pairID}.bam {resultsDir}/bam/{sampleID}.bam \\
                | java -jar {vs} somatic -mpileup {tmpDir}/{sampleID} \\
                --min-coverage-normal {minCov} --min-coverage-tumor {minCov} \\
                --min-var-freq {MAF} --strand-filter 1 --output-vcf
            bcftools reheader -f {reference}.fai {tmpDir}/{sampleID}.indel.vcf -o {tmpDir}/{sampleID}.indel.fix.vcf
            bcftools reheader -f {reference}.fai {tmpDir}/{sampleID}.snp.vcf -o {tmpDir}/{sampleID}.snp.fix.vcf
        """.format(reference=reference, resultsDir=resultsDir, pairID=pairID, sampleID=sampleID, vs=vs, tmpDir=tmpDir, minCov=minCov, MAF=MAF)
        print(cmd)
        os.system(cmd)
예제 #2
0
    def annovar(self):
        humandb = self.runningInfo["setting"]["Annotation"]["humandb"]
        buildver = self.runningInfo["setting"]["Annotation"]["buildver"]
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/annovar_" + sampleID
        mkdir(tmpDir)

        self.snpeff()

        cmd = """
            convert2annovar.pl -format vcf4 \\
                {resultsDir}/annotation/{sampleID}.snpeff.vcf \\
                --includeinfo > {tmpDir}/{sampleID}.avinput
            table_annovar.pl {tmpDir}/{sampleID}.avinput \\
                {humandb} -buildver {buildver} \\
                -out {tmpDir}/{sampleID} -remove \\
                -protocol refGene,avsnp150,gnomad211_genome,clinvar_20210308,JaxCkb,Civic,OncoKB,dbnsfp41a,cosmic92_coding,intervar_20180118 \\
                -operation g,f,f,f,f,f,f,f,f,f \\
                -nastring - -thread {threads} -otherinfo
            cp {tmpDir}/{sampleID}.{buildver}_multianno.txt {resultsDir}/annotation/
        """.format(tmpDir=tmpDir,
                   resultsDir=resultsDir,
                   sampleID=sampleID,
                   humandb=humandb,
                   threads=threads,
                   buildver=buildver)
        print(cmd)
        os.system(cmd)
예제 #3
0
파일: msi.py 프로젝트: pzweuj/DNApipeline
    def msisensor_pro(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        msi_baseline = self.runningInfo["setting"]["Other"]["msisensorpro_baseline"]
        msi_list = self.runningInfo["setting"]["Other"]["msi_list"]

        tmpDir = resultsDir + "/tempFile/msisensorpro_" + sampleID
        mkdir(tmpDir)
        
        if pairID == None:
            cmd = """
                msisensor-pro pro -d {msi_list} \\
                    -t {resultsDir}/bam/{sampleID}.bam \\
                    -o {tmpDir}/{sampleID}
                mv {tmpDir}/{sampleID} {tmpDir}/{sampleID}.txt
                cp {tmpDir}/{sampleID}.txt {resultsDir}/msi/{sampleID}.MSIsensorp.txt
            """.format(msi_list=msi_list, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir)
        else:
            cmd = """
                msisensor-pro msi -d {msi_list} \\
                    -n {resultsDir}/bam/{pairID}.bam \\
                    -t {resultsDir}/bam/{sampleID}.bam \\
                    -o {tmpDir}/{sampleID}
                mv {tmpDir}/{sampleID} {tmpDir}/{sampleID}.txt
                cp {tmpDir}/{sampleID}.txt {resultsDir}/msi/{sampleID}.MSIsensorp.txt
            """.format(msi_list=msi_list, resultsDir=resultsDir, pairID=pairID, sampleID=sampleID, tmpDir=tmpDir)
        print(cmd)
        os.system(cmd)
예제 #4
0
    def bcftools(self):
        reference = self.reference
        resultsDir = self.output
        sampleID = self.sample
        bedFile = self.bed
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/bcftools_" + sampleID
        mkdir(tmpDir)

        if bedFile != None:
            cmd = """
                bcftools mpileup -f {reference} \\
                    {resultsDir}/bam/{sampleID}.bam \\
                    | bcftools call -mv -O v \\
                    -o {tmpDir}/{sampleID}.bcftools.vcf \\
                    -t {threads} -R {bedFile}
            """.format(bedFile=bedFile, reference=reference, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir)
        else:
            cmd = """
                bcftools mpileup -f {reference} \\
                    {resultsDir}/bam/{sampleID}.bam \\
                    | bcftools call -mv -O v \\
                    -o {tmpDir}/{sampleID}.bcftools.vcf \\
                    -t {threads}
            """.format(bedFile=bedFile, reference=reference, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir)
        print(cmd)
        os.system(cmd)
예제 #5
0
    def hlahd(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        buildver = self.buildver
        threads = self.threads

        # 以下数据库无需指定参考基因坐标,为通用数据库,因此不写入配置文件中
        freq = "/home/bioinfo/ubuntu/software/hlahd.1.3.0/freq_data"
        dictionary = "/home/bioinfo/ubuntu/software/hlahd.1.3.0/dictionary"
        split_file = "/home/bioinfo/ubuntu/software/hlahd.1.3.0/HLA_gene.ABC.txt"

        tmpDir = resultsDir + "/tempFile/hlahd_" + sampleID
        self.tmpDir = tmpDir
        mkdir(tmpDir)
        self.extractHLA()

        if pairID != None:
            sampleID = pairID
        cmd = """
            hlahd.sh -t {threads} -m 100 -c 0.95 -f {freq} \\
                {tmpDir}/{sampleID}.HLA.R1.fastq {tmpDir}/{sampleID}.HLA.R2.fastq \\
                {split_file} {dictionary} {sampleID} {tmpDir}
            cp {tmpDir}/{sampleID}/result/{sampleID}_final.result.txt {resultsDir}/HLA/{sampleID}.hlahd.txt
        """.format(threads=threads,
                   freq=freq,
                   split_file=split_file,
                   dictionary=dictionary,
                   resultsDir=resultsDir,
                   sampleID=sampleID,
                   tmpDir=tmpDir)
        print(cmd)
        os.system(cmd)
예제 #6
0
    def optitype(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        buildver = self.buildver
        threads = self.threads
        optipipe = "/home/bioinfo/ubuntu/software/OptiType-1.3.5/OptiTypePipeline.py"

        tmpDir = resultsDir + "/tempFile/optitype_" + sampleID
        self.tmpDir = tmpDir
        mkdir(tmpDir)
        self.extractHLA()

        if pairID != None:
            sampleID = pairID
        cmd = """
            python {optipipe} \\
                -i {tmpDir}/{sampleID}.HLA.R1.fastq {tmpDir}/{sampleID}.HLA.R2.fastq \\
                -d -o {tmpDir} -p {sampleID} -v
            cp {tmpDir}/{sampleID}_result.tsv {resultsDir}/HLA/{sampleID}.optitype.txt
        """.format(optipipe=optipipe,
                   tmpDir=tmpDir,
                   sampleID=sampleID,
                   resultsDir=resultsDir)
        print(cmd)
        os.system(cmd)
예제 #7
0
    def seq2hla(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        buildver = self.buildver
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/seq2hla_" + sampleID
        self.tmpDir = tmpDir
        mkdir(tmpDir)
        self.extractHLA()

        if pairID != None:
            sampleID = pairID
        s2h = "/home/bioinfo/ubuntu/software/seq2HLA/seq2HLA.py"
        cmd = """
            python {s2h} \\
                -1 {tmpDir}/{sampleID}.HLA.R1.fastq \\
                -2 {tmpDir}/{sampleID}.HLA.R2.fastq \\
                -r {tmpDir}/{sampleID} -p {threads}
            cp {tmpDir}/{sampleID}-ClassI-class.HLAgenotype4digits {resultsDir}/HLA/{sampleID}.seq2HLA.txt
        """.format(s2h=s2h,
                   tmpDir=tmpDir,
                   sampleID=sampleID,
                   threads=threads,
                   resultsDir=resultsDir)
        print(cmd)
        os.system(cmd)


# end
예제 #8
0
    def snpeff(self):
        humandb = self.runningInfo["setting"]["Annotation"]["humandb"]
        buildver = self.runningInfo["setting"]["Annotation"]["buildver"]
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair

        if pairID == None:
            vcfFile = resultsDir + "/vcf/" + sampleID + ".vcf"
        else:
            vcfFile = resultsDir + "/vcf/" + sampleID + ".filter.vcf"

        tmpDir = resultsDir + "/tempFile/snpeff_" + sampleID
        mkdir(tmpDir)
        cmd = """
            java -jar /home/bioinfo/ubuntu/software/snpEff/snpEff.jar \\
                -c /home/bioinfo/ubuntu/software/snpEff/snpEff.config \\
                -s {tmpDir}/{sampleID}.summary.html \\
                {buildver} {vcfFile} > {tmpDir}/{sampleID}.snpeff.vcf
            cp {tmpDir}/{sampleID}.snpeff.vcf {resultsDir}/annotation/
        """.format(buildver=buildver,
                   vcfFile=vcfFile,
                   tmpDir=tmpDir,
                   sampleID=sampleID,
                   resultsDir=resultsDir)
        print(cmd)
        os.system(cmd)
예제 #9
0
    def neopredpipe(self):
        sampleID = self.sample
        pairID = self.pair
        resultsDir = self.output

        threads = self.threads
        buildver = self.buildver
        NeoPred = "/home/bioinfo/ubuntu/software/NeoPredPipe-1.1/NeoPredPipe.py"

        tmpDir = resultsDir + "/tempFile/Neoantigen_" + sampleID
        mkdir(tmpDir)
        vcf = resultsDir + "/vcf/" + sampleID + ".filter.vcf"
        vcfDir = tmpDir + "/vcf"
        mkdir(vcfDir)
        shutil.copy(vcf, vcfDir + "/" + sampleID + ".vcf")
        self.extractHLAResults()

        cmd = """
            python {NeoPred} \\
                -I {tmpDir}/vcf \\
                -H {tmpDir}/{sampleID}.hlas \\
                -o {tmpDir} \\
                -n {sampleID} \\
                -c 0
            sed '1iSample\\tR\\tLine\\tchrom\\tallelepos\\tref\\talt\\tGeneName\\tpos\\thla\\tpeptide\\tcore\\tOf\\tGp\\tGl\\tIp\\tIl\\tIcore\\tIdentity\\tScore_EL\\t%Rank_EL\\tScore_BA\\t%Rank_BA\\tAff(nM)\\tCandidate\\tBindLevel\\tNovelty' \\
                {tmpDir}/{sampleID}.neoantigens.txt > {tmpDir}/{sampleID}.neoantigens.tsv
            cp {tmpDir}/{sampleID}.neoantigens.tsv {resultsDir}/Neoantigen/
        """.format(NeoPred=NeoPred,
                   vcf=vcf,
                   tmpDir=tmpDir,
                   sampleID=sampleID,
                   resultsDir=resultsDir)
        print(cmd)
        os.system(cmd)
예제 #10
0
    def pisces(self):
        """
        需建立索引
        dotnet CreateGenomeSizeFile.dll \
            -g hg19/ \
            -s "H**o sapiens (UCSC hg19)" \
            -o hg19/
        """
        database = self.databases
        resultsDir = self.output
        sampleID = self.sample
        bedFile = self.bed
        threads = self.threads
        minDP = self.filtDP
        minMAF = self.MAF

        piscesBin = "/home/bioinfo/ubuntu/software/Pisces_5.2.10.49/Pisces.dll"

        tmpDir = resultsDir + "/tempFile/pisces_" + sampleID
        mkdir(tmpDir)

        if bedFile != None:
            cmd = """
                dotnet {piscesBin} -b {resultsDir}/bam/{sampleID}.bam \\
                    -g {database} \\
                    -o {tmpDir} \\
                    -t {threads} \\
                    -i {bedFile} \\
                    --mindp {minDP} \\
                    --minvf {minMAF} \\
                    --minvq 0 --threadbychr true
            """.format(bedFile=bedFile, minDP=minDP, minMAF=minMAF, piscesBin=piscesBin, resultsDir=resultsDir, sampleID=sampleID, database=database, tmpDir=tmpDir, threads=threads)
        else:
            cmd = """
                dotnet {piscesBin} -b {resultsDir}/bam/{sampleID}.bam \\
                    -g {database} \\
                    -o {tmpDir} \\
                    -t {threads} \\
                    --mindp {minDP} \\
                    --minvf {minMAF} \\
                    --minvq 0 --threadbychr true
            """.format(minDP=minDP, minMAF=minMAF, piscesBin=piscesBin, resultsDir=resultsDir, sampleID=sampleID, database=database, tmpDir=tmpDir, threads=threads)
        print(cmd)
        os.system(cmd)

        filt = """
            bcftools view \\
                -e "GT='0/0' | GT='./.' | GT='0/.'" \\
                {tmpDir}/{sampleID}.genome.vcf > {tmpDir}/{sampleID}.muts.vcf
            bcftools view \\
                -e "FILTER='LowDP'" \\
                {tmpDir}/{sampleID}.muts.vcf > {tmpDir}/{sampleID}.pisces.vcf
            cp {tmpDir}/{sampleID}.pisces.vcf {resultsDir}/vcf/{sampleID}.vcf
        """.format(tmpDir=tmpDir, sampleID=sampleID, resultsDir=resultsDir)
        print(filt)
        os.system(filt)
예제 #11
0
파일: loh.py 프로젝트: pzweuj/DNApipeline
    def TempPurityPloidy(self):
        sampleID = self.sample
        pairID = self.pair
        resultsDir = self.output

        tmpDir = resultsDir + "/tempFile/LOH_" + sampleID
        mkdir(tmpDir)

        solution = open(tmpDir + "/" + sampleID + ".solutions.txt", "w")
        solution.write("Ploidy\ttumorPurity\ttumorPloidy\n")
        solution.write(sampleID + "\t2\t0.8\t2\n")
        solution.close()
예제 #12
0
    def manta(self):
        manta = "/home/bioinfo/ubuntu/software/manta-1.6.0/bin/configManta.py"
        reference = self.reference
        tumorBam = self.bam
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/manta_" + sampleID
        mkdir(tmpDir)

        if pairID == None:
            cmd = """
                rm -rf {tmpDir}/*
                {manta} \\
                    --tumorBam {tumorBam} \\
                    --referenceFasta {reference} \\
                    --exome \\
                    --generateEvidenceBam \\
                    --runDir {tmpDir}
                {tmpDir}/runWorkflow.py -j {threads}
                zcat {tmpDir}/results/variants/tumorSV.vcf.gz > {tmpDir}/{sampleID}.manta.vcf
            """.format(threads=threads,
                       sampleID=sampleID,
                       manta=manta,
                       tumorBam=tumorBam,
                       reference=reference,
                       tmpDir=tmpDir)
        else:
            normalBam = self.normal
            cmd = """
                rm -rf {tmpDir}/*
                {manta} \\
                    --tumorBam {tumorBam} \\
                    --normalBam {normalBam} \\
                    --referenceFasta {reference} \\
                    --exome \\
                    --generateEvidenceBam \\
                    --runDir {tmpDir}
                {tmpDir}/runWorkflow.py -j {threads}
                zcat {tmpDir}/results/variants/somaticSV.vcf.gz > {tmpDir}/{sampleID}.manta.vcf
            """.format(threads=threads,
                       sampleID=sampleID,
                       manta=manta,
                       normalBam=normalBam,
                       tumorBam=tumorBam,
                       reference=reference,
                       tmpDir=tmpDir)

        print(cmd)
        os.system(cmd)
예제 #13
0
    def bwa_mem(self):
        reference = self.reference
        threads = self.threads
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair

        tmpDir = resultsDir + "/tempFile/bwa_" + sampleID
        mkdir(tmpDir)
        tmp = tmpDir + "/tmp"
        mkdir(tmp)

        cmd = """
            bwa mem -t {threads} \\
                -M \\
                -R "@RG\\tID:{sampleID}\\tLB:{sampleID}\\tPL:illumina\\tPU:Hiseq\\tSM:{sampleID}" \\
                {reference} \\
                {resultsDir}/cleandata/{sampleID}.clean_R1.fastq.gz \\
                {resultsDir}/cleandata/{sampleID}.clean_R2.fastq.gz \\
                | sambamba view -f bam -t {threads} -S /dev/stdin > {tmpDir}/{sampleID}.bam
            sambamba sort {tmpDir}/{sampleID}.bam -t {threads} -o {tmpDir}/{sampleID}.sort.bam --tmpdir {tmp} -p
            rm {tmpDir}/{sampleID}.bam
            cp {tmpDir}/{sampleID}.sort.bam {resultsDir}/bam/{sampleID}.bam
            cp {tmpDir}/{sampleID}.sort.bam.bai {resultsDir}/bam/{sampleID}.bam.bai
            rm -rf {tmp}
        """.format(tmpDir=tmpDir,
                   threads=threads,
                   sampleID=sampleID,
                   reference=reference,
                   resultsDir=resultsDir,
                   tmp=tmp)
        print(cmd)
        os.system(cmd)

        if pairID != None:
            pairDir = resultsDir + "/tempFile/bwa_" + pairID
            mkdir(pairDir)
            tmp = pairDir + "/tmp"
            mkdir(tmp)
            p = """
                bwa mem -t {threads} \\
                    -M \\
                    -R "@RG\\tID:{pairID}\\tLB:{pairID}\\tPL:illumina\\tPU:Hiseq\\tSM:{pairID}" \\
                    {reference} \\
                    {resultsDir}/cleandata/{pairID}.clean_R1.fastq.gz \\
                    {resultsDir}/cleandata/{pairID}.clean_R2.fastq.gz \\
                    | sambamba view -f bam -t {threads} -S /dev/stdin > {pairDir}/{pairID}.bam
                sambamba sort {pairDir}/{pairID}.bam -t {threads} -o {pairDir}/{pairID}.sort.bam --tmpdir {tmp} -p
                rm {pairDir}/{pairID}.bam
                cp {pairDir}/{pairID}.sort.bam {resultsDir}/bam/{pairID}.bam
                cp {pairDir}/{pairID}.sort.bam.bai {resultsDir}/bam/{pairID}.bam.bai
                rm -rf {tmp}
            """.format(pairDir=pairDir,
                       threads=threads,
                       pairID=pairID,
                       reference=reference,
                       resultsDir=resultsDir,
                       tmp=tmp)
            print(p)
            os.system(p)
예제 #14
0
파일: msi.py 프로젝트: pzweuj/DNApipeline
    def msisensor_ct(self):
        resultsDir = self.output
        sampleID = self.sample
        threads = self.threads
        msi_model = self.runningInfo["setting"]["Other"]["MSIsensor_ct"]

        tmpDir = resultsDir + "/tempFile/msisensorct_" + sampleID
        mkdir(tmpDir)

        cmd = """
            msisensor-ct msi -D -M {msi_model} -t {resultsDir}/bam/{sampleID}.bam \\
                -o {tmpDir} -b {threads}
        """.format(msi_model=msi_model, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir, threads=threads)
        print(cmd)
        os.system(cmd)        
예제 #15
0
    def markDuplicates(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        threads = self.threads
        if self.removeDups:
            remove = "-r"
        else:
            remove = ""

        tmpDir = resultsDir + "/tempFile/markDups_" + sampleID
        mkdir(tmpDir)
        tmp = tmpDir + "/tmp"
        mkdir(tmp)

        cmd = """
            sambamba markdup \\
                {resultsDir}/bam/{sampleID}.bam \\
                {tmpDir}/{sampleID}.marked.bam \\
                -p --overflow-list-size 600000 \\
                --tmpdir {tmp} \\
                -t {threads} {remove}
            rm -rf {tmp}
            cp {tmpDir}/{sampleID}.marked.bam {resultsDir}/bam/{sampleID}.bam
            cp {tmpDir}/{sampleID}.marked.bam.bai {resultsDir}/bam/{sampleID}.bam.bai            
        """.format(threads=threads,
                   tmpDir=tmpDir,
                   resultsDir=resultsDir,
                   sampleID=sampleID,
                   remove=remove,
                   tmp=tmp)
        print(cmd)
        os.system(cmd)

        if pairID != None:
            pairDir = resultsDir + "/tempFile/markDups_" + pairID
            mkdir(pairDir)
            tmp = pairDir + "/tmp"
            mkdir(tmp)
            p = """
                sambamba markdup \\
                    {resultsDir}/bam/{pairID}.bam \\
                    {pairDir}/{pairID}.marked.bam \\
                    -p --overflow-list-size 600000 \\
                    --tmpdir {tmp} \\
                    -t {threads} {remove}
                rm -rf {tmp}
                cp {pairDir}/{pairID}.marked.bam {resultsDir}/bam/{pairID}.bam
                cp {pairDir}/{pairID}.marked.bam.bai {resultsDir}/bam/{pairID}.bam.bai
            """.format(threads=threads,
                       pairDir=pairDir,
                       resultsDir=resultsDir,
                       pairID=pairID,
                       remove=remove,
                       tmp=tmp)
            print(p)
            os.system(p)
예제 #16
0
    def varscan_filter(self):
        reference = self.reference
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        filtDP = self.filtDP
        MAF = self.filtQUAL    

        tmpDir = resultsDir + "/tempFile/varscan2_" + sampleID
        outputFile = tmpDir + "/" + sampleID + ".merge.vcf"
        output = open(outputFile, "w")
        indel = open(tmpDir + "/" + sampleID + ".indel.fix.vcf", "r")
        snp = open(tmpDir + "/" + sampleID + ".snp.fix.vcf", "r")

        for i in indel:
            if i.startswith("#"):
                if "NORMAL\tTUMOR" in i:
                    i = i.replace("NORMAL\tTUMOR", sampleID + "\t" + pairID)
            else:
                ii = i.replace("\n", "").split("\t")
                li = ii[0:9]
                li.append(ii[10])
                li.append(ii[9])
                i = "\t".join(li) + "\n"
            output.write(i)
        indel.close()

        for s in snp:
            if not s.startswith("#"):
                ss = s.replace("\n", "").split("\t")
                si = ss[0:9]
                si.append(ss[10])
                si.append(ss[9])
                s = "\t".join(si) + "\n"
                output.write(s)
        snp.close()
        output.close()

        tmp = tmpDir + "/tmp"
        mkdir(tmp)
        cmd = """
            bcftools sort {outputFile} -O v -o {tmpDir}/{sampleID}.vcf -T {tmp}
            cp {tmpDir}/{sampleID}.vcf {resultsDir}/vcf/{sampleID}_{pairID}.vcf
        """.format(resultsDir=resultsDir, outputFile=outputFile, tmpDir=tmpDir, sampleID=sampleID, pairID=pairID, tmp=tmp)
        print(cmd)
        os.system(cmd)
예제 #17
0
    def gemini(self):
        gemini_multi = "/home/bioinfo/ubuntu/software/GeminiMulti_5.2.10.49/GeminiMulti.dll"

        databases = self.databases
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/gemini_" + sampleID
        mkdir(tmpDir)
        cmd = """
            dotnet {gemini_multi} -bam {resultsDir}/bam/{sampleID}.bam \\
                -genome {databases} \\
                --outFolder {tmpDir} \\
                --numprocesses {threads} \\
                --samtools /home/bioinfo/ubuntu/software/samtools-1.11
        """.format(gemini_multi=gemini_multi,
                   resultsDir=resultsDir,
                   sampleID=sampleID,
                   databases=databases,
                   tmpDir=tmpDir,
                   threads=threads)
        print(cmd)
        os.system(cmd)

        if pairID != None:
            pairDir = resultsDir + "/tempFile/gemini_" + pairID
            mkdir(pairDir)
            p = """
                dotnet {gemini_multi} -bam {resultsDir}/bam/{pairID}.bam \\
                    -genome {databases} \\
                    --outFolder {pairDir} \\
                    --numprocesses {threads} \\
                    --samtools /home/bioinfo/ubuntu/software/samtools-1.11
            """.format(gemini_multi=gemini_multi,
                       resultsDir=resultsDir,
                       pairID=pairID,
                       databases=databases,
                       pairDir=pairDir,
                       threads=threads)
            print(p)
            os.system(p)
예제 #18
0
    def gatk_filter(self):
        small_exac = self.databases + "/" + self.runningInfo["setting"]["Mutation"]["gatk_filter"]["small_exac"]
        bedFile = self.bed
        reference = self.reference
        resultsDir = self.output
        sampleID = self.sample

        checkBQSR = sampleID + ".BQSR.bam"
        if checkBQSR in os.listdir(resultsDir + "/bam"):
            bamFile = checkBQSR
        else:
            bamFile = sampleID + ".bam"       

        tmpDir = resultsDir + "/tempFile/gatk_" + sampleID
        mkdir(tmpDir)

        cmd = """
            gatk GetPileupSummaries \\
                -I {resultsDir}/bam/{bamFile} \\
                -O {tmpDir}/{sampleID}.pileups.table \\
                -V {small_exac} \\
                -L {bedFile} \\
                -R {reference}

            gatk CalculateContamination \\
                -I {tmpDir}/{sampleID}.pileups.table \\
                -O {tmpDir}/{sampleID}.contamination.table

            gatk FilterMutectCalls \\
                -R {reference} \\
                -V {tmpDir}/{sampleID}.m2.vcf \\
                -O {tmpDir}/{sampleID}.m2.contFiltered.vcf \\
                --contamination-table {tmpDir}/{sampleID}.contamination.table

            bcftools view \\
                {tmpDir}/{sampleID}.m2.contFiltered.vcf \\
                -f PASS,clustered_events,slippage \\
                > {tmpDir}/{sampleID}.filter.vcf
            cp {tmpDir}/{sampleID}.filter.vcf {resultsDir}/vcf/{sampleID}.vcf
        """.format(tmpDir=tmpDir, bamFile=bamFile, resultsDir=resultsDir, sampleID=sampleID, small_exac=small_exac, bedFile=bedFile, reference=reference)
        print(cmd)
        os.system(cmd)
예제 #19
0
    def hlascan(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        buildver = self.buildver
        threads = self.threads
        hla_scan = "/home/bioinfo/ubuntu/software/HLAscan/hla_scan_r_v2.1.4"
        hla_db = "/home/bioinfo/ubuntu/software/HLAscan/HLA-ALL.IMGT"

        tmpDir = resultsDir + "/tempFile/hlascan_" + sampleID
        self.tmpDir = tmpDir
        mkdir(tmpDir)
        self.extractHLA()

        if pairID != None:
            sampleID = pairID

        print("开始进行HLA分型")
        cmd = """
            {hla_scan} -l {tmpDir}/{sampleID}.HLA.R1.fastq \\
                -r {tmpDir}/{sampleID}.HLA.R2.fastq \\
                -t {threads} \\
                -d {hla_db} -g HLA-A > {tmpDir}/{sampleID}.HLA-A.txt
            {hla_scan} -l {tmpDir}/{sampleID}.HLA.R1.fastq \\
                -r {tmpDir}/{sampleID}.HLA.R2.fastq \\
                -t {threads} \\
                -d {hla_db} -g HLA-B > {tmpDir}/{sampleID}.HLA-B.txt
            {hla_scan} -l {tmpDir}/{sampleID}.HLA.R1.fastq \\
                -r {tmpDir}/{sampleID}.HLA.R2.fastq \\
                -t {threads} \\
                -d {hla_db} -g HLA-C > {tmpDir}/{sampleID}.HLA-C.txt
            cat {tmpDir}/{sampleID}.HLA-A.txt {tmpDir}/{sampleID}.HLA-B.txt {tmpDir}/{sampleID}.HLA-C.txt \\
                > {tmpDir}/{sampleID}.hlascan.txt
            cp {tmpDir}/{sampleID}.hlascan.txt {resultsDir}/HLA/
        """.format(hla_scan=hla_scan,
                   hla_db=hla_db,
                   tmpDir=tmpDir,
                   sampleID=sampleID,
                   threads=threads,
                   resultsDir=resultsDir)
        print(cmd)
        os.system(cmd)
예제 #20
0
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.pair = runningInfo["pair"]
        self.rawdata = runningInfo["rawdata"]
        self.output = runningInfo["output"]

        self.threads = str(runningInfo["process"]["threads"])
        self.runApp = runningInfo["process"]["Mutation"]["SV"]
        self.reference = runningInfo["setting"]["Mapping"]["reference"]
        self.bed = runningInfo["setting"]["Mutation"]["Bed"]

        if runningInfo["setting"]["QC"]["UMI_loc"] != None:
            self.bam = self.output + "/tempFile/bwa_" + self.sample + "/" + self.sample + ".sort.bam"

            if self.pair != None:
                self.normal = self.output + "/tempFile/bwa_" + self.pair + "/" + self.sample + ".sort.bam"

            if not os.path.exists(self.bam):
                self.bam = self.output + "/bam/" + self.sample + ".bam"

                if self.pair != None:
                    self.normal = self.output + "/bam/" + self.pair + ".bam"
        else:
            self.bam = self.output + "/bam/" + self.sample + ".bam"
            if self.pair != None:
                self.normal = self.output + "/bam/" + self.pair + ".bam"

        mkdir(self.output)
        mkdir(self.output + "/tempFile")
        mkdir(self.output + "/Fusion")
예제 #21
0
파일: qc.py 프로젝트: pzweuj/DNApipeline
    def fastqc(self):
        rawdataDir = self.rawdata
        sampleID = self.sample
        pairID = self.pair
        resultsDir = self.output
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/fastqc_" + sampleID
        mkdir(tmpDir)

        if pairID != None:
            pairDir = resultsDir + "/tempFile/fastqc_" + pairID
            mkdir(pairDir)

        cmd = """
            fastqc \\
                {rawdataDir}/{sampleID}_R1.fastq.gz \\
                {rawdataDir}/{sampleID}_R2.fastq.gz \\
                -t {threads} -o {tmpDir}
        
        """.format(rawdataDir=rawdataDir,
                   sampleID=sampleID,
                   threads=threads,
                   tmpDir=tmpDir)
        print(cmd)
        os.system(cmd)

        if pairID != None:
            p = """
                fastqc \\
                    {rawdataDir}/{pairID}_R1.fastq.gz \\
                    {rawdataDir}/{pairID}_R2.fastq.gz \\
                    -t {threads} -o {pairDir}
            
            """.format(rawdataDir=rawdataDir,
                       pairID=pairID,
                       threads=threads,
                       pairDir=pairDir)
            print(p)
            os.system(p)
예제 #22
0
파일: loh.py 프로젝트: pzweuj/DNApipeline
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.pair = runningInfo["pair"]
        self.rawdata = runningInfo["rawdata"]
        self.output = runningInfo["output"]

        self.threads = str(runningInfo["process"]["threads"])
        self.buildver = runningInfo["setting"]["Annotation"]["buildver"]

        mkdir(self.output)
        mkdir(self.output + "/tempFile")
        mkdir(self.output + "/HLA")
        mkdir(self.output + "/LOH")
예제 #23
0
    def factera(self):
        factera = "/home/bioinfo/ubuntu/software/factera/factera.pl"
        resultsDir = self.output
        sampleID = self.sample
        threads = self.threads

        tmpDir = resultsDir + "/tempFile/factera_" + sampleID
        mkdir(tmpDir)

        cmd = """
            {factera} -F -p {threads} \\
                -o {tmpDir} \\
                {resultsDir}/bam/{sampleID}.bam \\
                {exonBed} \\
                {referenceTwoBit}
        """.format(factera=factera,
                   resultsDir=resultsDir,
                   sampleID=sampleID,
                   exonBed=exonBed,
                   referenceTwoBit=referenceTwoBit,
                   tmpDir=tmpDir,
                   threads=threads)
        print(cmd)
        os.system(cmd)
예제 #24
0
파일: qc.py 프로젝트: pzweuj/DNApipeline
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.pair = runningInfo["pair"]
        self.rawdata = runningInfo["rawdata"]
        self.output = runningInfo["output"]

        self.threads = str(runningInfo["process"]["threads"])
        self.runApp = runningInfo["process"]["QC"]

        self.UMI_loc = runningInfo["setting"]["QC"]["UMI_loc"]
        self.UMI_len = runningInfo["setting"]["QC"]["UMI_len"]

        mkdir(self.output)
        mkdir(self.output + "/QC")
        mkdir(self.output + "/cleandata")
        mkdir(self.output + "/tempFile")
예제 #25
0
    def gatk_haplotypecaller(self):
        reference = self.reference
        resultsDir = self.output
        sampleID = self.sample
        bedFile = self.bed
        threads = self.threads
        bedFile = self.bed

        tmpDir = resultsDir + "/tempFile/HaplotypeCaller_" + sampleID
        mkdir(tmpDir)

        if bedFile == None:
            bedFile = "null"
        cmd = """
            gatk HaplotypeCaller \\
                -R {reference} \\
                -I {resultsDir}/bam/{sampleID}.bam \\
                -O {tmpDir}/{sampleID}.htc.vcf \\
                -L {bedFile} \\
                --native-pair-hmm-threads {threads}
            cp {tmpDir}/{sampleID}.htc.vcf {resultsDir}/vcf/{sampleID}.vcf
        """.format(reference=reference, resultsDir=resultsDir, tmpDir=tmpDir, sampleID=sampleID, bedFile=bedFile, threads=threads)
        print(cmd)
        os.system(cmd)
예제 #26
0
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.pair = runningInfo["pair"]
        self.rawdata = runningInfo["rawdata"]
        self.output = runningInfo["output"]

        self.threads = str(runningInfo["process"]["threads"])

        mkdir(self.output)
        mkdir(self.output + "/tempFile")
        mkdir(self.output + "/annotation")
예제 #27
0
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.rawdata = runningInfo["rawdata"]
        self.output = runningInfo["output"]

        self.runApp = runningInfo["process"]["Other"]["TMB"]
        self.panelSize = runningInfo["setting"]["Other"]["PanelSize"]
        self.buildver = runningInfo["setting"]["Annotation"]["buildver"]

        mkdir(self.output)
        mkdir(self.output + "/tempFile")
        mkdir(self.output + "/TMB")
예제 #28
0
파일: msi.py 프로젝트: pzweuj/DNApipeline
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.rawdata = runningInfo["rawdata"]
        self.pair = runningInfo["pair"]
        self.output = runningInfo["output"]

        self.threads = str(runningInfo["process"]["threads"])
        self.runApp = runningInfo["process"]["Other"]["MSI"]

        mkdir(self.output)
        mkdir(self.output + "/tempFile")
        mkdir(self.output + "/msi")
예제 #29
0
    def gencore(self):
        resultsDir = self.output
        sampleID = self.sample
        pairID = self.pair
        threads = self.threads
        reference = self.reference

        tmpDir = resultsDir + "/tempFile/gencore_" + sampleID
        mkdir(tmpDir)
        tmp = tmpDir + "/tmp"
        mkdir(tmp)
        cmd = """
            gencore -i {resultsDir}/bam/{sampleID}.bam \\
                -r {reference} \\
                -o {tmpDir}/{sampleID}.umi.bam \\
                -u UMI -s 2 -d 1 \\
                -j {tmpDir}/{sampleID}.json -h {tmpDir}/{sampleID}.html
            sambamba sort -t {threads} {tmpDir}/{sampleID}.umi.bam -o {tmpDir}/{sampleID}.umi.sort.bam --tmpdir {tmp} -p
            cp {tmpDir}/{sampleID}.umi.sort.bam {resultsDir}/bam/{sampleID}.bam
            cp {tmpDir}/{sampleID}.umi.sort.bam.bai {resultsDir}/bam/{sampleID}.bam.bai
            rm -rf {tmp}
        """.format(reference=reference,
                   resultsDir=resultsDir,
                   sampleID=sampleID,
                   tmpDir=tmpDir,
                   threads=threads,
                   tmp=tmp)
        print(cmd)
        os.system(cmd)

        if pairID != None:
            pairDir = resultsDir + "/tempFile/gencore_" + pairID
            mkdir(pairDir)
            p = """
                gencore -i {resultsDir}/bam/{pairID}.bam \\
                    -r {reference} \\
                    -o {pairDir}/{pairID}.umi.bam \\
                    -u UMI -s 2 -d 1 \\
                    -j {pairDir}/{pairID}.json -h {pairDir}/{pairID}.html
                sambamba sort -t {threads} {pairDir}/{pairID}.umi.bam -o {pairDir}/{pairID}.umi.sort.bam --tmpdir {tmp} -p
                cp {pairDir}/{pairID}.umi.sort.bam {resultsDir}/bam/{pairID}.bam
                cp {pairDir}/{pairID}.umi.sort.bam.bai {resultsDir}/bam/{pairID}.bam.bai
                rm -rf {tmp}
            """.format(reference=reference,
                       resultsDir=resultsDir,
                       pairID=pairID,
                       pairDir=pairDir,
                       threads=threads,
                       tmp=tmp)
            print(p)
            os.system(p)
예제 #30
0
    def __init__(self, runningInfo):
        self.runningInfo = runningInfo
        self.sample = runningInfo["sample"]
        self.pair = runningInfo["pair"]
        self.rawdata = runningInfo["rawdata"]
        self.output = runningInfo["output"]

        self.reference = runningInfo["setting"]["Mapping"]["reference"]
        self.threads = str(runningInfo["process"]["threads"])
        self.runApp = runningInfo["process"]["Mutation"]["CNV"]

        mkdir(self.output)
        mkdir(self.output + "/tempFile")
        mkdir(self.output + "/cnv")