def varscan2(self): reference = self.reference resultsDir = self.output sampleID = self.sample pairID = self.pair bedFile = self.bed threads = self.threads MAF = self.MAF DP = self.filtDP minCov = str(int(int(DP) * float(MAF))) tmpDir = resultsDir + "/tempFile/varscan2_" + sampleID mkdir(tmpDir) vs = "/home/bioinfo/ubuntu/software/VarScan.v2.3.9/VarScan.v2.3.9.jar" if pairID == None: print("varscan2仅适用于配对分析,请指定配对样本并重新运行") exit() cmd = """ samtools mpileup -B -f {reference} -q 15 -d 10000 \\ {resultsDir}/bam/{pairID}.bam {resultsDir}/bam/{sampleID}.bam \\ | java -jar {vs} somatic -mpileup {tmpDir}/{sampleID} \\ --min-coverage-normal {minCov} --min-coverage-tumor {minCov} \\ --min-var-freq {MAF} --strand-filter 1 --output-vcf bcftools reheader -f {reference}.fai {tmpDir}/{sampleID}.indel.vcf -o {tmpDir}/{sampleID}.indel.fix.vcf bcftools reheader -f {reference}.fai {tmpDir}/{sampleID}.snp.vcf -o {tmpDir}/{sampleID}.snp.fix.vcf """.format(reference=reference, resultsDir=resultsDir, pairID=pairID, sampleID=sampleID, vs=vs, tmpDir=tmpDir, minCov=minCov, MAF=MAF) print(cmd) os.system(cmd)
def annovar(self): humandb = self.runningInfo["setting"]["Annotation"]["humandb"] buildver = self.runningInfo["setting"]["Annotation"]["buildver"] resultsDir = self.output sampleID = self.sample pairID = self.pair threads = self.threads tmpDir = resultsDir + "/tempFile/annovar_" + sampleID mkdir(tmpDir) self.snpeff() cmd = """ convert2annovar.pl -format vcf4 \\ {resultsDir}/annotation/{sampleID}.snpeff.vcf \\ --includeinfo > {tmpDir}/{sampleID}.avinput table_annovar.pl {tmpDir}/{sampleID}.avinput \\ {humandb} -buildver {buildver} \\ -out {tmpDir}/{sampleID} -remove \\ -protocol refGene,avsnp150,gnomad211_genome,clinvar_20210308,JaxCkb,Civic,OncoKB,dbnsfp41a,cosmic92_coding,intervar_20180118 \\ -operation g,f,f,f,f,f,f,f,f,f \\ -nastring - -thread {threads} -otherinfo cp {tmpDir}/{sampleID}.{buildver}_multianno.txt {resultsDir}/annotation/ """.format(tmpDir=tmpDir, resultsDir=resultsDir, sampleID=sampleID, humandb=humandb, threads=threads, buildver=buildver) print(cmd) os.system(cmd)
def msisensor_pro(self): resultsDir = self.output sampleID = self.sample pairID = self.pair msi_baseline = self.runningInfo["setting"]["Other"]["msisensorpro_baseline"] msi_list = self.runningInfo["setting"]["Other"]["msi_list"] tmpDir = resultsDir + "/tempFile/msisensorpro_" + sampleID mkdir(tmpDir) if pairID == None: cmd = """ msisensor-pro pro -d {msi_list} \\ -t {resultsDir}/bam/{sampleID}.bam \\ -o {tmpDir}/{sampleID} mv {tmpDir}/{sampleID} {tmpDir}/{sampleID}.txt cp {tmpDir}/{sampleID}.txt {resultsDir}/msi/{sampleID}.MSIsensorp.txt """.format(msi_list=msi_list, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir) else: cmd = """ msisensor-pro msi -d {msi_list} \\ -n {resultsDir}/bam/{pairID}.bam \\ -t {resultsDir}/bam/{sampleID}.bam \\ -o {tmpDir}/{sampleID} mv {tmpDir}/{sampleID} {tmpDir}/{sampleID}.txt cp {tmpDir}/{sampleID}.txt {resultsDir}/msi/{sampleID}.MSIsensorp.txt """.format(msi_list=msi_list, resultsDir=resultsDir, pairID=pairID, sampleID=sampleID, tmpDir=tmpDir) print(cmd) os.system(cmd)
def bcftools(self): reference = self.reference resultsDir = self.output sampleID = self.sample bedFile = self.bed threads = self.threads tmpDir = resultsDir + "/tempFile/bcftools_" + sampleID mkdir(tmpDir) if bedFile != None: cmd = """ bcftools mpileup -f {reference} \\ {resultsDir}/bam/{sampleID}.bam \\ | bcftools call -mv -O v \\ -o {tmpDir}/{sampleID}.bcftools.vcf \\ -t {threads} -R {bedFile} """.format(bedFile=bedFile, reference=reference, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir) else: cmd = """ bcftools mpileup -f {reference} \\ {resultsDir}/bam/{sampleID}.bam \\ | bcftools call -mv -O v \\ -o {tmpDir}/{sampleID}.bcftools.vcf \\ -t {threads} """.format(bedFile=bedFile, reference=reference, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir) print(cmd) os.system(cmd)
def hlahd(self): resultsDir = self.output sampleID = self.sample pairID = self.pair buildver = self.buildver threads = self.threads # 以下数据库无需指定参考基因坐标,为通用数据库,因此不写入配置文件中 freq = "/home/bioinfo/ubuntu/software/hlahd.1.3.0/freq_data" dictionary = "/home/bioinfo/ubuntu/software/hlahd.1.3.0/dictionary" split_file = "/home/bioinfo/ubuntu/software/hlahd.1.3.0/HLA_gene.ABC.txt" tmpDir = resultsDir + "/tempFile/hlahd_" + sampleID self.tmpDir = tmpDir mkdir(tmpDir) self.extractHLA() if pairID != None: sampleID = pairID cmd = """ hlahd.sh -t {threads} -m 100 -c 0.95 -f {freq} \\ {tmpDir}/{sampleID}.HLA.R1.fastq {tmpDir}/{sampleID}.HLA.R2.fastq \\ {split_file} {dictionary} {sampleID} {tmpDir} cp {tmpDir}/{sampleID}/result/{sampleID}_final.result.txt {resultsDir}/HLA/{sampleID}.hlahd.txt """.format(threads=threads, freq=freq, split_file=split_file, dictionary=dictionary, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir) print(cmd) os.system(cmd)
def optitype(self): resultsDir = self.output sampleID = self.sample pairID = self.pair buildver = self.buildver threads = self.threads optipipe = "/home/bioinfo/ubuntu/software/OptiType-1.3.5/OptiTypePipeline.py" tmpDir = resultsDir + "/tempFile/optitype_" + sampleID self.tmpDir = tmpDir mkdir(tmpDir) self.extractHLA() if pairID != None: sampleID = pairID cmd = """ python {optipipe} \\ -i {tmpDir}/{sampleID}.HLA.R1.fastq {tmpDir}/{sampleID}.HLA.R2.fastq \\ -d -o {tmpDir} -p {sampleID} -v cp {tmpDir}/{sampleID}_result.tsv {resultsDir}/HLA/{sampleID}.optitype.txt """.format(optipipe=optipipe, tmpDir=tmpDir, sampleID=sampleID, resultsDir=resultsDir) print(cmd) os.system(cmd)
def seq2hla(self): resultsDir = self.output sampleID = self.sample pairID = self.pair buildver = self.buildver threads = self.threads tmpDir = resultsDir + "/tempFile/seq2hla_" + sampleID self.tmpDir = tmpDir mkdir(tmpDir) self.extractHLA() if pairID != None: sampleID = pairID s2h = "/home/bioinfo/ubuntu/software/seq2HLA/seq2HLA.py" cmd = """ python {s2h} \\ -1 {tmpDir}/{sampleID}.HLA.R1.fastq \\ -2 {tmpDir}/{sampleID}.HLA.R2.fastq \\ -r {tmpDir}/{sampleID} -p {threads} cp {tmpDir}/{sampleID}-ClassI-class.HLAgenotype4digits {resultsDir}/HLA/{sampleID}.seq2HLA.txt """.format(s2h=s2h, tmpDir=tmpDir, sampleID=sampleID, threads=threads, resultsDir=resultsDir) print(cmd) os.system(cmd) # end
def snpeff(self): humandb = self.runningInfo["setting"]["Annotation"]["humandb"] buildver = self.runningInfo["setting"]["Annotation"]["buildver"] resultsDir = self.output sampleID = self.sample pairID = self.pair if pairID == None: vcfFile = resultsDir + "/vcf/" + sampleID + ".vcf" else: vcfFile = resultsDir + "/vcf/" + sampleID + ".filter.vcf" tmpDir = resultsDir + "/tempFile/snpeff_" + sampleID mkdir(tmpDir) cmd = """ java -jar /home/bioinfo/ubuntu/software/snpEff/snpEff.jar \\ -c /home/bioinfo/ubuntu/software/snpEff/snpEff.config \\ -s {tmpDir}/{sampleID}.summary.html \\ {buildver} {vcfFile} > {tmpDir}/{sampleID}.snpeff.vcf cp {tmpDir}/{sampleID}.snpeff.vcf {resultsDir}/annotation/ """.format(buildver=buildver, vcfFile=vcfFile, tmpDir=tmpDir, sampleID=sampleID, resultsDir=resultsDir) print(cmd) os.system(cmd)
def neopredpipe(self): sampleID = self.sample pairID = self.pair resultsDir = self.output threads = self.threads buildver = self.buildver NeoPred = "/home/bioinfo/ubuntu/software/NeoPredPipe-1.1/NeoPredPipe.py" tmpDir = resultsDir + "/tempFile/Neoantigen_" + sampleID mkdir(tmpDir) vcf = resultsDir + "/vcf/" + sampleID + ".filter.vcf" vcfDir = tmpDir + "/vcf" mkdir(vcfDir) shutil.copy(vcf, vcfDir + "/" + sampleID + ".vcf") self.extractHLAResults() cmd = """ python {NeoPred} \\ -I {tmpDir}/vcf \\ -H {tmpDir}/{sampleID}.hlas \\ -o {tmpDir} \\ -n {sampleID} \\ -c 0 sed '1iSample\\tR\\tLine\\tchrom\\tallelepos\\tref\\talt\\tGeneName\\tpos\\thla\\tpeptide\\tcore\\tOf\\tGp\\tGl\\tIp\\tIl\\tIcore\\tIdentity\\tScore_EL\\t%Rank_EL\\tScore_BA\\t%Rank_BA\\tAff(nM)\\tCandidate\\tBindLevel\\tNovelty' \\ {tmpDir}/{sampleID}.neoantigens.txt > {tmpDir}/{sampleID}.neoantigens.tsv cp {tmpDir}/{sampleID}.neoantigens.tsv {resultsDir}/Neoantigen/ """.format(NeoPred=NeoPred, vcf=vcf, tmpDir=tmpDir, sampleID=sampleID, resultsDir=resultsDir) print(cmd) os.system(cmd)
def pisces(self): """ 需建立索引 dotnet CreateGenomeSizeFile.dll \ -g hg19/ \ -s "H**o sapiens (UCSC hg19)" \ -o hg19/ """ database = self.databases resultsDir = self.output sampleID = self.sample bedFile = self.bed threads = self.threads minDP = self.filtDP minMAF = self.MAF piscesBin = "/home/bioinfo/ubuntu/software/Pisces_5.2.10.49/Pisces.dll" tmpDir = resultsDir + "/tempFile/pisces_" + sampleID mkdir(tmpDir) if bedFile != None: cmd = """ dotnet {piscesBin} -b {resultsDir}/bam/{sampleID}.bam \\ -g {database} \\ -o {tmpDir} \\ -t {threads} \\ -i {bedFile} \\ --mindp {minDP} \\ --minvf {minMAF} \\ --minvq 0 --threadbychr true """.format(bedFile=bedFile, minDP=minDP, minMAF=minMAF, piscesBin=piscesBin, resultsDir=resultsDir, sampleID=sampleID, database=database, tmpDir=tmpDir, threads=threads) else: cmd = """ dotnet {piscesBin} -b {resultsDir}/bam/{sampleID}.bam \\ -g {database} \\ -o {tmpDir} \\ -t {threads} \\ --mindp {minDP} \\ --minvf {minMAF} \\ --minvq 0 --threadbychr true """.format(minDP=minDP, minMAF=minMAF, piscesBin=piscesBin, resultsDir=resultsDir, sampleID=sampleID, database=database, tmpDir=tmpDir, threads=threads) print(cmd) os.system(cmd) filt = """ bcftools view \\ -e "GT='0/0' | GT='./.' | GT='0/.'" \\ {tmpDir}/{sampleID}.genome.vcf > {tmpDir}/{sampleID}.muts.vcf bcftools view \\ -e "FILTER='LowDP'" \\ {tmpDir}/{sampleID}.muts.vcf > {tmpDir}/{sampleID}.pisces.vcf cp {tmpDir}/{sampleID}.pisces.vcf {resultsDir}/vcf/{sampleID}.vcf """.format(tmpDir=tmpDir, sampleID=sampleID, resultsDir=resultsDir) print(filt) os.system(filt)
def TempPurityPloidy(self): sampleID = self.sample pairID = self.pair resultsDir = self.output tmpDir = resultsDir + "/tempFile/LOH_" + sampleID mkdir(tmpDir) solution = open(tmpDir + "/" + sampleID + ".solutions.txt", "w") solution.write("Ploidy\ttumorPurity\ttumorPloidy\n") solution.write(sampleID + "\t2\t0.8\t2\n") solution.close()
def manta(self): manta = "/home/bioinfo/ubuntu/software/manta-1.6.0/bin/configManta.py" reference = self.reference tumorBam = self.bam resultsDir = self.output sampleID = self.sample pairID = self.pair threads = self.threads tmpDir = resultsDir + "/tempFile/manta_" + sampleID mkdir(tmpDir) if pairID == None: cmd = """ rm -rf {tmpDir}/* {manta} \\ --tumorBam {tumorBam} \\ --referenceFasta {reference} \\ --exome \\ --generateEvidenceBam \\ --runDir {tmpDir} {tmpDir}/runWorkflow.py -j {threads} zcat {tmpDir}/results/variants/tumorSV.vcf.gz > {tmpDir}/{sampleID}.manta.vcf """.format(threads=threads, sampleID=sampleID, manta=manta, tumorBam=tumorBam, reference=reference, tmpDir=tmpDir) else: normalBam = self.normal cmd = """ rm -rf {tmpDir}/* {manta} \\ --tumorBam {tumorBam} \\ --normalBam {normalBam} \\ --referenceFasta {reference} \\ --exome \\ --generateEvidenceBam \\ --runDir {tmpDir} {tmpDir}/runWorkflow.py -j {threads} zcat {tmpDir}/results/variants/somaticSV.vcf.gz > {tmpDir}/{sampleID}.manta.vcf """.format(threads=threads, sampleID=sampleID, manta=manta, normalBam=normalBam, tumorBam=tumorBam, reference=reference, tmpDir=tmpDir) print(cmd) os.system(cmd)
def bwa_mem(self): reference = self.reference threads = self.threads resultsDir = self.output sampleID = self.sample pairID = self.pair tmpDir = resultsDir + "/tempFile/bwa_" + sampleID mkdir(tmpDir) tmp = tmpDir + "/tmp" mkdir(tmp) cmd = """ bwa mem -t {threads} \\ -M \\ -R "@RG\\tID:{sampleID}\\tLB:{sampleID}\\tPL:illumina\\tPU:Hiseq\\tSM:{sampleID}" \\ {reference} \\ {resultsDir}/cleandata/{sampleID}.clean_R1.fastq.gz \\ {resultsDir}/cleandata/{sampleID}.clean_R2.fastq.gz \\ | sambamba view -f bam -t {threads} -S /dev/stdin > {tmpDir}/{sampleID}.bam sambamba sort {tmpDir}/{sampleID}.bam -t {threads} -o {tmpDir}/{sampleID}.sort.bam --tmpdir {tmp} -p rm {tmpDir}/{sampleID}.bam cp {tmpDir}/{sampleID}.sort.bam {resultsDir}/bam/{sampleID}.bam cp {tmpDir}/{sampleID}.sort.bam.bai {resultsDir}/bam/{sampleID}.bam.bai rm -rf {tmp} """.format(tmpDir=tmpDir, threads=threads, sampleID=sampleID, reference=reference, resultsDir=resultsDir, tmp=tmp) print(cmd) os.system(cmd) if pairID != None: pairDir = resultsDir + "/tempFile/bwa_" + pairID mkdir(pairDir) tmp = pairDir + "/tmp" mkdir(tmp) p = """ bwa mem -t {threads} \\ -M \\ -R "@RG\\tID:{pairID}\\tLB:{pairID}\\tPL:illumina\\tPU:Hiseq\\tSM:{pairID}" \\ {reference} \\ {resultsDir}/cleandata/{pairID}.clean_R1.fastq.gz \\ {resultsDir}/cleandata/{pairID}.clean_R2.fastq.gz \\ | sambamba view -f bam -t {threads} -S /dev/stdin > {pairDir}/{pairID}.bam sambamba sort {pairDir}/{pairID}.bam -t {threads} -o {pairDir}/{pairID}.sort.bam --tmpdir {tmp} -p rm {pairDir}/{pairID}.bam cp {pairDir}/{pairID}.sort.bam {resultsDir}/bam/{pairID}.bam cp {pairDir}/{pairID}.sort.bam.bai {resultsDir}/bam/{pairID}.bam.bai rm -rf {tmp} """.format(pairDir=pairDir, threads=threads, pairID=pairID, reference=reference, resultsDir=resultsDir, tmp=tmp) print(p) os.system(p)
def msisensor_ct(self): resultsDir = self.output sampleID = self.sample threads = self.threads msi_model = self.runningInfo["setting"]["Other"]["MSIsensor_ct"] tmpDir = resultsDir + "/tempFile/msisensorct_" + sampleID mkdir(tmpDir) cmd = """ msisensor-ct msi -D -M {msi_model} -t {resultsDir}/bam/{sampleID}.bam \\ -o {tmpDir} -b {threads} """.format(msi_model=msi_model, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir, threads=threads) print(cmd) os.system(cmd)
def markDuplicates(self): resultsDir = self.output sampleID = self.sample pairID = self.pair threads = self.threads if self.removeDups: remove = "-r" else: remove = "" tmpDir = resultsDir + "/tempFile/markDups_" + sampleID mkdir(tmpDir) tmp = tmpDir + "/tmp" mkdir(tmp) cmd = """ sambamba markdup \\ {resultsDir}/bam/{sampleID}.bam \\ {tmpDir}/{sampleID}.marked.bam \\ -p --overflow-list-size 600000 \\ --tmpdir {tmp} \\ -t {threads} {remove} rm -rf {tmp} cp {tmpDir}/{sampleID}.marked.bam {resultsDir}/bam/{sampleID}.bam cp {tmpDir}/{sampleID}.marked.bam.bai {resultsDir}/bam/{sampleID}.bam.bai """.format(threads=threads, tmpDir=tmpDir, resultsDir=resultsDir, sampleID=sampleID, remove=remove, tmp=tmp) print(cmd) os.system(cmd) if pairID != None: pairDir = resultsDir + "/tempFile/markDups_" + pairID mkdir(pairDir) tmp = pairDir + "/tmp" mkdir(tmp) p = """ sambamba markdup \\ {resultsDir}/bam/{pairID}.bam \\ {pairDir}/{pairID}.marked.bam \\ -p --overflow-list-size 600000 \\ --tmpdir {tmp} \\ -t {threads} {remove} rm -rf {tmp} cp {pairDir}/{pairID}.marked.bam {resultsDir}/bam/{pairID}.bam cp {pairDir}/{pairID}.marked.bam.bai {resultsDir}/bam/{pairID}.bam.bai """.format(threads=threads, pairDir=pairDir, resultsDir=resultsDir, pairID=pairID, remove=remove, tmp=tmp) print(p) os.system(p)
def varscan_filter(self): reference = self.reference resultsDir = self.output sampleID = self.sample pairID = self.pair filtDP = self.filtDP MAF = self.filtQUAL tmpDir = resultsDir + "/tempFile/varscan2_" + sampleID outputFile = tmpDir + "/" + sampleID + ".merge.vcf" output = open(outputFile, "w") indel = open(tmpDir + "/" + sampleID + ".indel.fix.vcf", "r") snp = open(tmpDir + "/" + sampleID + ".snp.fix.vcf", "r") for i in indel: if i.startswith("#"): if "NORMAL\tTUMOR" in i: i = i.replace("NORMAL\tTUMOR", sampleID + "\t" + pairID) else: ii = i.replace("\n", "").split("\t") li = ii[0:9] li.append(ii[10]) li.append(ii[9]) i = "\t".join(li) + "\n" output.write(i) indel.close() for s in snp: if not s.startswith("#"): ss = s.replace("\n", "").split("\t") si = ss[0:9] si.append(ss[10]) si.append(ss[9]) s = "\t".join(si) + "\n" output.write(s) snp.close() output.close() tmp = tmpDir + "/tmp" mkdir(tmp) cmd = """ bcftools sort {outputFile} -O v -o {tmpDir}/{sampleID}.vcf -T {tmp} cp {tmpDir}/{sampleID}.vcf {resultsDir}/vcf/{sampleID}_{pairID}.vcf """.format(resultsDir=resultsDir, outputFile=outputFile, tmpDir=tmpDir, sampleID=sampleID, pairID=pairID, tmp=tmp) print(cmd) os.system(cmd)
def gemini(self): gemini_multi = "/home/bioinfo/ubuntu/software/GeminiMulti_5.2.10.49/GeminiMulti.dll" databases = self.databases resultsDir = self.output sampleID = self.sample pairID = self.pair threads = self.threads tmpDir = resultsDir + "/tempFile/gemini_" + sampleID mkdir(tmpDir) cmd = """ dotnet {gemini_multi} -bam {resultsDir}/bam/{sampleID}.bam \\ -genome {databases} \\ --outFolder {tmpDir} \\ --numprocesses {threads} \\ --samtools /home/bioinfo/ubuntu/software/samtools-1.11 """.format(gemini_multi=gemini_multi, resultsDir=resultsDir, sampleID=sampleID, databases=databases, tmpDir=tmpDir, threads=threads) print(cmd) os.system(cmd) if pairID != None: pairDir = resultsDir + "/tempFile/gemini_" + pairID mkdir(pairDir) p = """ dotnet {gemini_multi} -bam {resultsDir}/bam/{pairID}.bam \\ -genome {databases} \\ --outFolder {pairDir} \\ --numprocesses {threads} \\ --samtools /home/bioinfo/ubuntu/software/samtools-1.11 """.format(gemini_multi=gemini_multi, resultsDir=resultsDir, pairID=pairID, databases=databases, pairDir=pairDir, threads=threads) print(p) os.system(p)
def gatk_filter(self): small_exac = self.databases + "/" + self.runningInfo["setting"]["Mutation"]["gatk_filter"]["small_exac"] bedFile = self.bed reference = self.reference resultsDir = self.output sampleID = self.sample checkBQSR = sampleID + ".BQSR.bam" if checkBQSR in os.listdir(resultsDir + "/bam"): bamFile = checkBQSR else: bamFile = sampleID + ".bam" tmpDir = resultsDir + "/tempFile/gatk_" + sampleID mkdir(tmpDir) cmd = """ gatk GetPileupSummaries \\ -I {resultsDir}/bam/{bamFile} \\ -O {tmpDir}/{sampleID}.pileups.table \\ -V {small_exac} \\ -L {bedFile} \\ -R {reference} gatk CalculateContamination \\ -I {tmpDir}/{sampleID}.pileups.table \\ -O {tmpDir}/{sampleID}.contamination.table gatk FilterMutectCalls \\ -R {reference} \\ -V {tmpDir}/{sampleID}.m2.vcf \\ -O {tmpDir}/{sampleID}.m2.contFiltered.vcf \\ --contamination-table {tmpDir}/{sampleID}.contamination.table bcftools view \\ {tmpDir}/{sampleID}.m2.contFiltered.vcf \\ -f PASS,clustered_events,slippage \\ > {tmpDir}/{sampleID}.filter.vcf cp {tmpDir}/{sampleID}.filter.vcf {resultsDir}/vcf/{sampleID}.vcf """.format(tmpDir=tmpDir, bamFile=bamFile, resultsDir=resultsDir, sampleID=sampleID, small_exac=small_exac, bedFile=bedFile, reference=reference) print(cmd) os.system(cmd)
def hlascan(self): resultsDir = self.output sampleID = self.sample pairID = self.pair buildver = self.buildver threads = self.threads hla_scan = "/home/bioinfo/ubuntu/software/HLAscan/hla_scan_r_v2.1.4" hla_db = "/home/bioinfo/ubuntu/software/HLAscan/HLA-ALL.IMGT" tmpDir = resultsDir + "/tempFile/hlascan_" + sampleID self.tmpDir = tmpDir mkdir(tmpDir) self.extractHLA() if pairID != None: sampleID = pairID print("开始进行HLA分型") cmd = """ {hla_scan} -l {tmpDir}/{sampleID}.HLA.R1.fastq \\ -r {tmpDir}/{sampleID}.HLA.R2.fastq \\ -t {threads} \\ -d {hla_db} -g HLA-A > {tmpDir}/{sampleID}.HLA-A.txt {hla_scan} -l {tmpDir}/{sampleID}.HLA.R1.fastq \\ -r {tmpDir}/{sampleID}.HLA.R2.fastq \\ -t {threads} \\ -d {hla_db} -g HLA-B > {tmpDir}/{sampleID}.HLA-B.txt {hla_scan} -l {tmpDir}/{sampleID}.HLA.R1.fastq \\ -r {tmpDir}/{sampleID}.HLA.R2.fastq \\ -t {threads} \\ -d {hla_db} -g HLA-C > {tmpDir}/{sampleID}.HLA-C.txt cat {tmpDir}/{sampleID}.HLA-A.txt {tmpDir}/{sampleID}.HLA-B.txt {tmpDir}/{sampleID}.HLA-C.txt \\ > {tmpDir}/{sampleID}.hlascan.txt cp {tmpDir}/{sampleID}.hlascan.txt {resultsDir}/HLA/ """.format(hla_scan=hla_scan, hla_db=hla_db, tmpDir=tmpDir, sampleID=sampleID, threads=threads, resultsDir=resultsDir) print(cmd) os.system(cmd)
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.pair = runningInfo["pair"] self.rawdata = runningInfo["rawdata"] self.output = runningInfo["output"] self.threads = str(runningInfo["process"]["threads"]) self.runApp = runningInfo["process"]["Mutation"]["SV"] self.reference = runningInfo["setting"]["Mapping"]["reference"] self.bed = runningInfo["setting"]["Mutation"]["Bed"] if runningInfo["setting"]["QC"]["UMI_loc"] != None: self.bam = self.output + "/tempFile/bwa_" + self.sample + "/" + self.sample + ".sort.bam" if self.pair != None: self.normal = self.output + "/tempFile/bwa_" + self.pair + "/" + self.sample + ".sort.bam" if not os.path.exists(self.bam): self.bam = self.output + "/bam/" + self.sample + ".bam" if self.pair != None: self.normal = self.output + "/bam/" + self.pair + ".bam" else: self.bam = self.output + "/bam/" + self.sample + ".bam" if self.pair != None: self.normal = self.output + "/bam/" + self.pair + ".bam" mkdir(self.output) mkdir(self.output + "/tempFile") mkdir(self.output + "/Fusion")
def fastqc(self): rawdataDir = self.rawdata sampleID = self.sample pairID = self.pair resultsDir = self.output threads = self.threads tmpDir = resultsDir + "/tempFile/fastqc_" + sampleID mkdir(tmpDir) if pairID != None: pairDir = resultsDir + "/tempFile/fastqc_" + pairID mkdir(pairDir) cmd = """ fastqc \\ {rawdataDir}/{sampleID}_R1.fastq.gz \\ {rawdataDir}/{sampleID}_R2.fastq.gz \\ -t {threads} -o {tmpDir} """.format(rawdataDir=rawdataDir, sampleID=sampleID, threads=threads, tmpDir=tmpDir) print(cmd) os.system(cmd) if pairID != None: p = """ fastqc \\ {rawdataDir}/{pairID}_R1.fastq.gz \\ {rawdataDir}/{pairID}_R2.fastq.gz \\ -t {threads} -o {pairDir} """.format(rawdataDir=rawdataDir, pairID=pairID, threads=threads, pairDir=pairDir) print(p) os.system(p)
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.pair = runningInfo["pair"] self.rawdata = runningInfo["rawdata"] self.output = runningInfo["output"] self.threads = str(runningInfo["process"]["threads"]) self.buildver = runningInfo["setting"]["Annotation"]["buildver"] mkdir(self.output) mkdir(self.output + "/tempFile") mkdir(self.output + "/HLA") mkdir(self.output + "/LOH")
def factera(self): factera = "/home/bioinfo/ubuntu/software/factera/factera.pl" resultsDir = self.output sampleID = self.sample threads = self.threads tmpDir = resultsDir + "/tempFile/factera_" + sampleID mkdir(tmpDir) cmd = """ {factera} -F -p {threads} \\ -o {tmpDir} \\ {resultsDir}/bam/{sampleID}.bam \\ {exonBed} \\ {referenceTwoBit} """.format(factera=factera, resultsDir=resultsDir, sampleID=sampleID, exonBed=exonBed, referenceTwoBit=referenceTwoBit, tmpDir=tmpDir, threads=threads) print(cmd) os.system(cmd)
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.pair = runningInfo["pair"] self.rawdata = runningInfo["rawdata"] self.output = runningInfo["output"] self.threads = str(runningInfo["process"]["threads"]) self.runApp = runningInfo["process"]["QC"] self.UMI_loc = runningInfo["setting"]["QC"]["UMI_loc"] self.UMI_len = runningInfo["setting"]["QC"]["UMI_len"] mkdir(self.output) mkdir(self.output + "/QC") mkdir(self.output + "/cleandata") mkdir(self.output + "/tempFile")
def gatk_haplotypecaller(self): reference = self.reference resultsDir = self.output sampleID = self.sample bedFile = self.bed threads = self.threads bedFile = self.bed tmpDir = resultsDir + "/tempFile/HaplotypeCaller_" + sampleID mkdir(tmpDir) if bedFile == None: bedFile = "null" cmd = """ gatk HaplotypeCaller \\ -R {reference} \\ -I {resultsDir}/bam/{sampleID}.bam \\ -O {tmpDir}/{sampleID}.htc.vcf \\ -L {bedFile} \\ --native-pair-hmm-threads {threads} cp {tmpDir}/{sampleID}.htc.vcf {resultsDir}/vcf/{sampleID}.vcf """.format(reference=reference, resultsDir=resultsDir, tmpDir=tmpDir, sampleID=sampleID, bedFile=bedFile, threads=threads) print(cmd) os.system(cmd)
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.pair = runningInfo["pair"] self.rawdata = runningInfo["rawdata"] self.output = runningInfo["output"] self.threads = str(runningInfo["process"]["threads"]) mkdir(self.output) mkdir(self.output + "/tempFile") mkdir(self.output + "/annotation")
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.rawdata = runningInfo["rawdata"] self.output = runningInfo["output"] self.runApp = runningInfo["process"]["Other"]["TMB"] self.panelSize = runningInfo["setting"]["Other"]["PanelSize"] self.buildver = runningInfo["setting"]["Annotation"]["buildver"] mkdir(self.output) mkdir(self.output + "/tempFile") mkdir(self.output + "/TMB")
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.rawdata = runningInfo["rawdata"] self.pair = runningInfo["pair"] self.output = runningInfo["output"] self.threads = str(runningInfo["process"]["threads"]) self.runApp = runningInfo["process"]["Other"]["MSI"] mkdir(self.output) mkdir(self.output + "/tempFile") mkdir(self.output + "/msi")
def gencore(self): resultsDir = self.output sampleID = self.sample pairID = self.pair threads = self.threads reference = self.reference tmpDir = resultsDir + "/tempFile/gencore_" + sampleID mkdir(tmpDir) tmp = tmpDir + "/tmp" mkdir(tmp) cmd = """ gencore -i {resultsDir}/bam/{sampleID}.bam \\ -r {reference} \\ -o {tmpDir}/{sampleID}.umi.bam \\ -u UMI -s 2 -d 1 \\ -j {tmpDir}/{sampleID}.json -h {tmpDir}/{sampleID}.html sambamba sort -t {threads} {tmpDir}/{sampleID}.umi.bam -o {tmpDir}/{sampleID}.umi.sort.bam --tmpdir {tmp} -p cp {tmpDir}/{sampleID}.umi.sort.bam {resultsDir}/bam/{sampleID}.bam cp {tmpDir}/{sampleID}.umi.sort.bam.bai {resultsDir}/bam/{sampleID}.bam.bai rm -rf {tmp} """.format(reference=reference, resultsDir=resultsDir, sampleID=sampleID, tmpDir=tmpDir, threads=threads, tmp=tmp) print(cmd) os.system(cmd) if pairID != None: pairDir = resultsDir + "/tempFile/gencore_" + pairID mkdir(pairDir) p = """ gencore -i {resultsDir}/bam/{pairID}.bam \\ -r {reference} \\ -o {pairDir}/{pairID}.umi.bam \\ -u UMI -s 2 -d 1 \\ -j {pairDir}/{pairID}.json -h {pairDir}/{pairID}.html sambamba sort -t {threads} {pairDir}/{pairID}.umi.bam -o {pairDir}/{pairID}.umi.sort.bam --tmpdir {tmp} -p cp {pairDir}/{pairID}.umi.sort.bam {resultsDir}/bam/{pairID}.bam cp {pairDir}/{pairID}.umi.sort.bam.bai {resultsDir}/bam/{pairID}.bam.bai rm -rf {tmp} """.format(reference=reference, resultsDir=resultsDir, pairID=pairID, pairDir=pairDir, threads=threads, tmp=tmp) print(p) os.system(p)
def __init__(self, runningInfo): self.runningInfo = runningInfo self.sample = runningInfo["sample"] self.pair = runningInfo["pair"] self.rawdata = runningInfo["rawdata"] self.output = runningInfo["output"] self.reference = runningInfo["setting"]["Mapping"]["reference"] self.threads = str(runningInfo["process"]["threads"]) self.runApp = runningInfo["process"]["Mutation"]["CNV"] mkdir(self.output) mkdir(self.output + "/tempFile") mkdir(self.output + "/cnv")