Ejemplo n.º 1
0
    def work_script(self):
        return '''#!/bin/bash
                  source jre-8u92
                  source gatk-3.6.0
                  gatk='{gatk}'
                  set -eo pipefail

                  $gatk -T -T SelectVariants -V {input} -R {reference} --selectTypeToInclude MNP \
                                                                       --selectTypeToInclude MIXED \
                                                                       --out {output}.temp.vcf.gz

                  mv {output}.temp.vcf.gz {output}
                  '''.format(input=self.input().path,
                             output=self.output().path,
                             reference=self.reference,
                             gatk=gatk.format(mem=self.mem * self.n_cpu))
Ejemplo n.º 2
0
    def work_script(self):
        return '''#!/bin/bash
                source jre-8u92
                source gatk-3.6.0
                gatk='{gatk}'

                set -eo pipefail
                $gatk -T GenotypeGVCFs -R {reference} -L {intervals} -o {output}.temp.vcf.gz --includeNonVariantSites {variants}

                mv {output}.temp.vcf.gz {output}
                '''.format(output=self.output().path,
                           intervals=self.input()[0].path,
                           gatk=gatk.format(mem=self.mem * self.n_cpu),
                           reference=self.reference,
                           variants="\\\n".join([
                               " --variant " + lib.path
                               for lib in self.input()[1]
                           ]))
Ejemplo n.º 3
0
    def work_script(self):
        return '''#!/bin/bash
                source jre-8u92
                gatk='{gatk}'
                set -euo pipefail

                $gatk -T HaplotypeCaller  \
                      -R {reference} \
                      -I {input} \
                      -dontUseSoftClippedBases\
                      --emitRefConfidence GVCF \
                      -o {output}.temp.g.vcf

                mv {output}.temp.g.vcf {output}
        '''.format(input=self.input().path,
                   output=self.output().path,
                   gatk=gatk.format(mem=self.mem * self.n_cpu),
                   reference=self.reference)
Ejemplo n.º 4
0
    def work_script(self):
        return '''#!/bin/bash
               source jre-8u92
               source gatk-3.6.0
               gatk='{gatk}'
               picard='{picard}'
               set -euo pipefail

               $picard BuildBamIndex VERBOSITY=ERROR QUIET=true I={input}
               $gatk -T SplitNCigarReads --logging_level ERROR -R {reference} -I {input} -o {output}.temp -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS

               mv {output}.temp.bai {output}.bai
               mv {output}.temp {output}
                '''.format(input=self.input().path,
                           output=self.output().path,
                           picard=picard.format(mem=self.mem * self.n_cpu),
                           gatk=gatk.format(mem=self.mem * self.n_cpu),
                           reference=self.reference)
Ejemplo n.º 5
0
    def work_script(self):
        recal = os.path.join(self.base_dir, VERSION, PIPELINE, self.library,
                             self.library + "_recal.tsv")
        return '''#!/bin/bash
                  source jre-8u92
                  source gatk-3.6.0
                  gatk='{gatk}'
                  set -euo pipefail

                  $gatk -T BaseRecalibrator  -R {reference}  -I {input}  -knownSites {snp_db}  -o {recal}
                  $gatk -T PrintReads -R {reference} -I {input} -BQSR {recal} -o {output}.temp

                  mv {output}.temp {output}
                '''.format(gatk=gatk.format(mem=self.mem * self.n_cpu),
                           input=self.input().path,
                           output=self.output().path,
                           reference=self.reference,
                           recal=recal)
Ejemplo n.º 6
0
    def work_script(self):
        return '''#!/bin/bash
                  source jre-8u92
                  source gatk-3.6.0
                  source vcftools-0.1.13;
                  gatk='{gatk}'
                  set -eo pipefail

                  $gatk -T SelectVariants -V {input} -R {reference} \
                        --selectTypeToInclude NO_VARIATION \
                        --selectTypeToInclude SNP \
                        --out {output}.temp.vcf.gz

                  # Filter out * which represents spanning deletions
                  gzip -cd {output}.temp.vcf.gz | grep -v $'[,\t]\*' | bgzip -c > {output}.temp2.vcf.gz

                  mv {output}.temp2.vcf.gz {output}
                  '''.format(input=self.input().path,
                             output=self.output().path,
                             reference=self.reference,
                             gatk=gatk.format(mem=self.mem * self.n_cpu))
Ejemplo n.º 7
0
    def work_script(self):
        perfile = math.ceil(len(self.input()) / self.N_gvcfs)
        start_idx = perfile * self.idx
        end_idx = perfile * (self.idx + 1)
        self.variants = self.input(
        )[start_idx:end_idx] if self.idx < self.N_gvcfs - 1 else self.input(
        )[start_idx:]

        return '''#!/bin/bash
                source jre-8u92
                source gatk-3.6.0
                gatk='{gatk}'

                set -eo pipefail
                $gatk -T CombineGVCFs -R {reference} -o {output}.temp {variants}

                mv {output}.temp {output}
                '''.format(output=self.output().path,
                           gatk=gatk.format(mem=self.mem * self.n_cpu),
                           reference=self.reference,
                           variants="\\\n".join([
                               " --variant " + lib.path
                               for lib in self.variants
                           ]))