Ejemplo n.º 1
0
    def snp_effect(self):
        """
        Variant effect annotation. The .vcf files are annotated for variant effects using the SnpEff software.
        SnpEff annotates and predicts the effects of variants on genes (such as amino acid changes).
        """

        jobs = []

        output_directory = "variants"
        snpeff_prefix = os.path.join(output_directory, "allSamples")

        jobs.append(concat_jobs([
            Job(command="mkdir -p " + output_directory),
            snpeff.compute_effects( snpeff_prefix + ".vt.vcf.gz", snpeff_prefix + ".vt.snpeff.vcf", split=True),
            htslib.bgzip_tabix_vcf( snpeff_prefix + ".vt.snpeff.vcf", snpeff_prefix + ".vt.snpeff.vcf.gz")            
        ], name="compute_effects.allSamples"))

        return jobs
Ejemplo n.º 2
0
    def snp_effect(self):
        """
        Variant effect annotation. The .vcf files are annotated for variant effects using the SnpEff software.
        SnpEff annotates and predicts the effects of variants on genes (such as amino acid changes).
        """

        jobs = []

        output_directory = "variants"
        snpeff_prefix = os.path.join(output_directory, "allSamples")

        jobs.append(
            concat_jobs([
                Job(command="mkdir -p " + output_directory),
                snpeff.compute_effects(snpeff_prefix + ".vt.vcf.gz",
                                       snpeff_prefix + ".vt.snpeff.vcf",
                                       split=True),
                htslib.bgzip_tabix_vcf(snpeff_prefix + ".vt.snpeff.vcf",
                                       snpeff_prefix + ".vt.snpeff.vcf.gz")
            ],
                        name="compute_effects.allSamples"))

        return jobs
Ejemplo n.º 3
0
    def call_variants(self):
        """
        VarScan caller for insertions and deletions.
        """

        jobs = []

        nb_jobs = config.param('varscan', 'nb_jobs', type='posint')
        if nb_jobs > 50:
            log.warning(
                "Number of VarScan jobs is > 50. This is usually much. Anything beyond 20 can be problematic."
            )

        variants_directory = os.path.join("variants")
        varscan_directory = os.path.join(variants_directory, "rawVarScan")

        beds = []
        for idx in range(nb_jobs):
            beds.append(
                os.path.join(varscan_directory, 'chrs.' + str(idx) + '.bed'))

        genome_dictionary = config.param('DEFAULT',
                                         'genome_dictionary',
                                         type='filepath')

        if nb_jobs > 1:
            bedJob = tools.dict2beds(genome_dictionary, beds)
            jobs.append(
                concat_jobs(
                    [Job(command="mkdir -p " + varscan_directory), bedJob],
                    name="varscan.genome.beds"))

        bams = []
        sampleNamesFile = 'varscan_samples.tsv'
        sampleNames = open(sampleNamesFile, 'w')

        for sample in self.samples:
            alignment_directory = os.path.join("alignment", sample.name)
            input = os.path.join(alignment_directory,
                                 sample.name + ".matefixed.sorted.bam")
            bams.append(input)
            sampleNames.write("%s\n" % sample.name)
            bedfile = bvatools.resolve_readset_coverage_bed(sample.readsets[0])
            #sampleNames.append(sample.name)

        if nb_jobs == 1:
            job = concat_jobs([
                Job(command="mkdir -p " + varscan_directory,
                    samples=self.samples),
                pipe_jobs([
                    samtools.mpileup(bams,
                                     None,
                                     config.param('varscan',
                                                  'mpileup_other_options'),
                                     regionFile=bedfile),
                    varscan.mpileupcns(
                        None, None, sampleNamesFile,
                        config.param('varscan', 'other_options')),
                    htslib.bgzip_tabix_vcf(
                        None,
                        os.path.join(variants_directory, "allSamples.vcf.gz"))
                ])
            ],
                              name="varscan.single")

            jobs.append(job)

        else:
            output_vcfs = []
            for idx in range(nb_jobs):
                output_vcf = os.path.join(varscan_directory,
                                          "allSamples." + str(idx) + ".vcf.gz")
                varScanJob = pipe_jobs([
                    samtools.mpileup(bams,
                                     None,
                                     config.param('varscan',
                                                  'mpileup_other_options'),
                                     regionFile=beds[idx]),
                    varscan.mpileupcns(
                        None, None, sampleNamesFile,
                        config.param('varscan', 'other_options')),
                    htslib.bgzip_tabix_vcf(None, output_vcf)
                ],
                                       name="varscan." + str(idx))
                varScanJob.samples = self.samples
                output_vcfs.append(output_vcf)
                jobs.append(varScanJob)

            job = gatk.cat_variants(
                output_vcfs,
                os.path.join(variants_directory, "allSamples.vcf.gz"))
            job.name = "gatk_cat_varscan"
            job.samples = self.samples
            jobs.append(job)
        return jobs
Ejemplo n.º 4
0
    def call_variants(self):
        """
        VarScan caller for insertions and deletions.
        """

        jobs = []

        nb_jobs = config.param('varscan', 'nb_jobs', type='posint')
        if nb_jobs > 50:
            log.warning("Number of VarScan jobs is > 50. This is usually much. Anything beyond 20 can be problematic.")

        variants_directory = os.path.join("variants")
        varscan_directory = os.path.join(variants_directory, "rawVarScan")

        beds = []
        for idx in range(nb_jobs):
            beds.append(os.path.join(varscan_directory, 'chrs.' + str(idx) + '.bed'))

        genome_dictionary = config.param('DEFAULT', 'genome_dictionary', type='filepath')
        
        if nb_jobs > 1:
            bedJob = tools.dict2beds(genome_dictionary, beds)
            jobs.append(concat_jobs([mkdir_job,bedJob], name="varscan.genome.beds"))

        bams=[]
        sampleNamesFile = 'varscan_samples.tsv'
        sampleNames = open(sampleNamesFile, 'w')

        for sample in self.samples:
            alignment_directory = os.path.join("alignment", sample.name)
            input = os.path.join(alignment_directory, sample.name + ".matefixed.sorted.bam")
            bams.append(input)
            sampleNames.write("%s\n" % sample.name)
            bedfile = bvatools.resolve_readset_coverage_bed(sample.readsets[0])
            #sampleNames.append(sample.name)

        if nb_jobs == 1:
            job = concat_jobs([
                Job(command="mkdir -p " + varscan_directory),
                pipe_jobs([
                    samtools.mpileup(bams, None, config.param('varscan', 'mpileup_other_options'), regionFile=bedfile),
                    varscan.mpileupcns(None, None, sampleNamesFile, config.param('varscan', 'other_options')),
                    htslib.bgzip_tabix_vcf(None, os.path.join(variants_directory, "allSamples.vcf.gz"))
                ])
            ], name="varscan.single")

            jobs.append(job)

        else:
            output_vcfs=[]
            for idx in range(nb_jobs):
                output_vcf = os.path.join(varscan_directory, "allSamples."+str(idx)+".vcf.gz")
                varScanJob = pipe_jobs([
                    samtools.mpileup(bams, None, config.param('varscan', 'mpileup_other_options'), regionFile=beds[idx]),
                    varscan.mpileupcns(None, None, sampleNamesFile, config.param('varscan', 'other_options')),
                    htslib.bgzip_tabix_vcf(None, output_vcf)
                ], name = "varscan." + str(idx))
                output_vcfs.append(output_vcf)
                jobs.append(varScanJob)

            job=gatk.cat_variants(output_vcfs, os.path.join(variants_directory, "allSamples.vcf.gz"))
            job.name="gatk_cat_varscan"
            jobs.append(job)
        return jobs