def work_script(self): return '''#!/bin/bash source jre-8u92 source gatk-3.6.0 gatk='{gatk}' set -eo pipefail $gatk -T -T SelectVariants -V {input} -R {reference} --selectTypeToInclude MNP \ --selectTypeToInclude MIXED \ --out {output}.temp.vcf.gz mv {output}.temp.vcf.gz {output} '''.format(input=self.input().path, output=self.output().path, reference=self.reference, gatk=gatk.format(mem=self.mem * self.n_cpu))
def work_script(self): return '''#!/bin/bash source jre-8u92 source gatk-3.6.0 gatk='{gatk}' set -eo pipefail $gatk -T GenotypeGVCFs -R {reference} -L {intervals} -o {output}.temp.vcf.gz --includeNonVariantSites {variants} mv {output}.temp.vcf.gz {output} '''.format(output=self.output().path, intervals=self.input()[0].path, gatk=gatk.format(mem=self.mem * self.n_cpu), reference=self.reference, variants="\\\n".join([ " --variant " + lib.path for lib in self.input()[1] ]))
def work_script(self): return '''#!/bin/bash source jre-8u92 gatk='{gatk}' set -euo pipefail $gatk -T HaplotypeCaller \ -R {reference} \ -I {input} \ -dontUseSoftClippedBases\ --emitRefConfidence GVCF \ -o {output}.temp.g.vcf mv {output}.temp.g.vcf {output} '''.format(input=self.input().path, output=self.output().path, gatk=gatk.format(mem=self.mem * self.n_cpu), reference=self.reference)
def work_script(self): return '''#!/bin/bash source jre-8u92 source gatk-3.6.0 gatk='{gatk}' picard='{picard}' set -euo pipefail $picard BuildBamIndex VERBOSITY=ERROR QUIET=true I={input} $gatk -T SplitNCigarReads --logging_level ERROR -R {reference} -I {input} -o {output}.temp -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS mv {output}.temp.bai {output}.bai mv {output}.temp {output} '''.format(input=self.input().path, output=self.output().path, picard=picard.format(mem=self.mem * self.n_cpu), gatk=gatk.format(mem=self.mem * self.n_cpu), reference=self.reference)
def work_script(self): recal = os.path.join(self.base_dir, VERSION, PIPELINE, self.library, self.library + "_recal.tsv") return '''#!/bin/bash source jre-8u92 source gatk-3.6.0 gatk='{gatk}' set -euo pipefail $gatk -T BaseRecalibrator -R {reference} -I {input} -knownSites {snp_db} -o {recal} $gatk -T PrintReads -R {reference} -I {input} -BQSR {recal} -o {output}.temp mv {output}.temp {output} '''.format(gatk=gatk.format(mem=self.mem * self.n_cpu), input=self.input().path, output=self.output().path, reference=self.reference, recal=recal)
def work_script(self): return '''#!/bin/bash source jre-8u92 source gatk-3.6.0 source vcftools-0.1.13; gatk='{gatk}' set -eo pipefail $gatk -T SelectVariants -V {input} -R {reference} \ --selectTypeToInclude NO_VARIATION \ --selectTypeToInclude SNP \ --out {output}.temp.vcf.gz # Filter out * which represents spanning deletions gzip -cd {output}.temp.vcf.gz | grep -v $'[,\t]\*' | bgzip -c > {output}.temp2.vcf.gz mv {output}.temp2.vcf.gz {output} '''.format(input=self.input().path, output=self.output().path, reference=self.reference, gatk=gatk.format(mem=self.mem * self.n_cpu))
def work_script(self): perfile = math.ceil(len(self.input()) / self.N_gvcfs) start_idx = perfile * self.idx end_idx = perfile * (self.idx + 1) self.variants = self.input( )[start_idx:end_idx] if self.idx < self.N_gvcfs - 1 else self.input( )[start_idx:] return '''#!/bin/bash source jre-8u92 source gatk-3.6.0 gatk='{gatk}' set -eo pipefail $gatk -T CombineGVCFs -R {reference} -o {output}.temp {variants} mv {output}.temp {output} '''.format(output=self.output().path, gatk=gatk.format(mem=self.mem * self.n_cpu), reference=self.reference, variants="\\\n".join([ " --variant " + lib.path for lib in self.variants ]))