Example #1
0
 def rare_variants_famseq(self, selected_vcf, vcf_out):
     """Call rare variants with pedigree information using FamSeq"""
     # e.g. FamSeq vcf -vcfFile ../TestData/test.vcf -pedFile ../TestData/fam01.ped -output test.FamSeq.vcf -v
     command = "FamSeq vcf -vcfFile {selected_vcf} -pedFile {ped_file} -output {vcf_out}".format(
         selected_vcf=selected_vcf, ped_file=self.famseq_ped_file, vcf_out=vcf_out
     )
     run_stage(self.state, "rare_variants_famseq", command)
Example #2
0
    def concatenate_vcfs(self, vcf_files_in, vcf_out):
        if (len(vcf_files_in) > 200):
            merge_commands = []
            temp_merge_outputs = []
            for n in range(0,
                           int(math.ceil(float(len(vcf_files_in)) / 200.0))):
                start = n * 200
                filelist = vcf_files_in[start:start + 200]
                filelist_command = ' '.join([vcf for vcf in filelist])
                temp_merge_filename = vcf_out.rstrip(
                    '.vcf') + ".temp_{start}.vcf".format(start=str(start))
                command1 = 'bcftools merge -O z -o {vcf_out} {join_vcf_files} && bcftools index -t -f {vcf_out}; '.format(
                    vcf_out=temp_merge_filename,
                    join_vcf_files=filelist_command)
                merge_commands.append(command1)
                temp_merge_outputs.append(temp_merge_filename)

            final_merge_vcfs = ' '.join([vcf for vcf in temp_merge_outputs])
            command2 = 'bcftools merge -O z -o {vcf_out} {join_vcf_files} '.format(
                vcf_out=vcf_out, join_vcf_files=final_merge_vcfs)

            merge_commands.append(command2)
            final_command = ''.join(merge_commands)
        else:

            filelist = ' '.join([vcf for vcf in vcf_files_in])
            final_command = 'bcftools merge -O z -o {vcf_out} {vcf_files} '.format(
                vcf_out=vcf_out, vcf_files=filelist)

        run_stage(self.state, 'concatenate_vcfs', final_command)
Example #3
0
 def clip_bam(self, bam_in, sorted_bam_out):
     '''Clip the BAM file using Bamclipper'''
     bamclipper_args = '{bamclipper} -b {bam_in} -p {primer_bedpe_file} -n 1'.format(
         bamclipper=self.bamclipper,
         bam_in=bam_in,
         primer_bedpe_file=self.primer_bedpe_file)
     run_stage(self.state, 'clip_bam', bamclipper_args)
Example #4
0
 def extract_split_read_alignments(self, bam_in, splitters_bam_out):
     '''Extract the split-read alignments using samtools'''
     command = ('samtools view -h {input_bam} | ' \
                'extractSplitReads_BwaMem -i stdin | ' \
                'samtools view -Sb - > {output_bam}' 
                .format(input_bam=bam_in, output_bam=splitters_bam_out))
     run_stage(self.state, 'extract_split_read_alignments', command)
Example #5
0
    def apply_undr_rover(self, inputs, vcf_output, sample_id):
        '''Apply undr_rover to call variants from paired end fastq files'''
        fastq_read1_in, fastq_read2_in = inputs
        cores = self.get_stage_options('apply_undr_rover', 'cores')
        safe_make_dir('variants/undr_rover')
        safe_make_dir('variants/undr_rover/coverdir')
        coverdir = "variants/undr_rover/coverdir"
        coverfile = sample_id + ".coverage"

        command = 'undr_rover --primer_coords {coord_file} ' \
                  '--primer_sequences {primer_file} ' \
                  '--reference {reference} ' \
                  '--out {vcf_output} ' \
                  '--coverfile {coverdir}/{coverfile} ' \
                  '--proportionthresh {proportionthresh} ' \
                  '--absthresh {absthresh} ' \
                  '--max_variants {maxvariants} ' \
                  '{fastq_read1} {fastq_read2}'.format(
                        coord_file=self.coord_file, primer_file=self.primer_file,
                        reference=self.reference,
                        vcf_output=vcf_output,
                        coverdir=coverdir,
                        proportionthresh=self.proportionthresh,
                        absthresh=self.absthresh,
                        maxvariants=self.maxvariants,
                        coverfile=coverfile,
                        fastq_read1=fastq_read1_in,
                        fastq_read2=fastq_read2_in)
        run_stage(self.state, 'apply_undr_rover', command)
Example #6
0
 def apply_cat_vcf(self, inputs, vcf_out):
     '''Concatenate and sort undr_rover VCF files for downstream analysis'''
     vcfs = ' '.join([vcf for vcf in inputs])
     # safe_make_dir('variants')
     command = 'vcf-concat {vcfs} | vcf-sort -c | bgzip -c > {vcf_out} '.format(
         vcfs=vcfs, vcf_out=vcf_out)
     run_stage(self.state, 'apply_cat_vcf', command)
Example #7
0
 def vt_decompose_normalise(self, vcf_in, vcf_out):
     '''Decompose multiallelic sites and normalise representations'''
     command = "vt decompose -s {vcf_in} | vt normalize -r {reference} -o " \
               "{vcf_out} -".format(reference=self.reference,
                                    vcf_in=vcf_in,
                                    vcf_out=vcf_out)
     run_stage(self.state, 'vt_decompose_normalise', command)
Example #8
0
    def apply_undr_rover(self, input, vcf_output, sample_id):
        '''Apply undr_rover to call variants from paired end fastq files'''
        fastq_read1_in = 'fastqs/' + input[11:-20] + '_R1_001.fastq.gz'
        fastq_read2_in = 'fastqs/' + input[11:-20] + '_R2_001.fastq.gz'
        coverdir = "variants/undr_rover/coverdir"
        coverfile = sample_id + ".coverage"

        if "QC" in fastq_read1_in:
            primer_file = self.primer_file_QC
            interval_file = self.interval_file_QC
        else:
            primer_file = self.primer_file_default
            interval_file = self.interval_file

        command = 'undr_rover --primer_coords {coord_file} ' \
                  '--primer_sequences {primer_file} ' \
                  '--reference {reference} ' \
                  '--out {vcf_output} ' \
                  '--coverfile {coverdir}/{coverfile} ' \
                  '--proportionthresh {proportionthresh} ' \
                  '--absthresh {absthresh} ' \
                  '--max_variants {maxvariants} ' \
                  '--fast --snvthresh 10 ' \
                  '{fastq_read1} {fastq_read2}'.format(
                        coord_file=interval_file, primer_file=primer_file,
                        reference=self.reference,
                        vcf_output=vcf_output,
                        coverdir=coverdir,
                        proportionthresh=self.proportionthresh,
                        absthresh=self.absthresh,
                        maxvariants=self.maxvariants,
                        coverfile=coverfile,
                        fastq_read1=fastq_read1_in,
                        fastq_read2=fastq_read2_in)
        run_stage(self.state, 'apply_undr_rover', command)
Example #9
0
    def align_bwa(self, inputs, bam_out, sample_id, lib):
        '''Align the paired end fastq files to the reference genome using bwa'''
        fastq_read1_in, fastq_read2_in = inputs
        cores = self.get_stage_options('align_bwa', 'cores')
        read_group = '"@RG\\tID:{sample}\\tSM:{sample}\\tPU:lib1\\tPL:Illumina"' \
            .format(sample=sample_id)

        if "QC" in fastq_read1_in:
            primer_bedpe_file = self.primer_bedpe_file_QC
        else:
            primer_bedpe_file = self.primer_bedpe_file_default

        command = 'bwa mem -M -t {cores} -R {read_group} {reference} {fastq_read1} {fastq_read2} ' \
                  '| {bamclipper} -i -p {primer_bedpe_file} -n {cores} ' \
                  '| samtools view -u -h -q 1 -f 2 -F 4 -F 8 -F 256 - ' \
                  '| samtools sort -@ {cores} -o {bam}; samtools index {bam}'.format(
                          cores=cores,
                          read_group=read_group,
                          fastq_read1=fastq_read1_in,
                          fastq_read2=fastq_read2_in,
                          reference=self.reference,
                          bamclipper=self.bamclipper,
                          primer_bedpe_file=primer_bedpe_file,
                          bam=bam_out)
        run_stage(self.state, 'align_bwa', command)
Example #10
0
    def combine_gvcf_gatk(self, vcf_files_in, vcf_out):
        '''Combine G.VCF files for all samples using GATK'''
        merge_commands = []
        temp_merge_outputs = []
        for n in range(0, int(math.ceil(float(len(vcf_files_in)) / 200.0))):
            start = n * 200
            filelist = vcf_files_in[start:start + 200]
            filelist_command = ' '.join(
                ['--variant ' + vcf for vcf in filelist])
            temp_merge_filename = vcf_out.rstrip(
                '.vcf') + ".temp_{start}.vcf".format(start=str(start))
            gatk_args_full = "java -Xmx{mem}g -jar {jar_path} -T CombineGVCFs -R {reference} " \
                             "--disable_auto_index_creation_and_locking_when_reading_rods " \
                             "{g_vcf_files} -o {vcf_out}; ".format(reference=self.reference,
                                                                   jar_path=self.gatk_jar,
                                                                   mem=self.state.config.get_stage_options('combine_gvcf_gatk', 'mem'),
                                                                   g_vcf_files=filelist_command,
                                                                   vcf_out=temp_merge_filename)
            merge_commands.append(gatk_args_full)
            temp_merge_outputs.append(temp_merge_filename)

        final_merge_vcfs = ' '.join(
            ['--variant ' + vcf for vcf in temp_merge_outputs])
        gatk_args_full_final = "java -Xmx{mem}g -jar {jar_path} -T CombineGVCFs -R {reference} " \
                               "--disable_auto_index_creation_and_locking_when_reading_rods " \
                               "{g_vcf_files} -o {vcf_out}".format(reference=self.reference,
                                                                   jar_path=self.gatk_jar,
                                                                   mem=self.state.config.get_stage_options('combine_gvcf_gatk', 'mem'),
                                                                   g_vcf_files=final_merge_vcfs,
                                                                   vcf_out=vcf_out)

        merge_commands.append(gatk_args_full_final)
        final_command = ''.join(merge_commands)
        run_stage(self.state, 'combine_gvcf_gatk', final_command)
Example #11
0
 def apply_bcftools(self, mpileup_in, vcf_out):
     '''Bcftools call variants'''
     mpileup_in = mpileup_in
     # mpileup_in = ' '.join([vcf for vcf in vcf_files_in])
     command = 'bcftools call -vmO v -o {vcf_out} {mpileup_in}'.format(
         vcf_out=vcf_out, mpileup_in=mpileup_in)
     run_stage(self.state, 'apply_bcftools', command)
Example #12
0
 def extract_split_read_alignments(self, bam_in, splitters_bam_out):
     '''Extract the split-read alignments using samtools'''
     command = ('samtools view -h {input_bam} | ' \
                'extractSplitReads_BwaMem -i stdin | ' \
                'samtools view -Sb - > {output_bam}'
                .format(input_bam=bam_in, output_bam=splitters_bam_out))
     run_stage(self.state, 'extract_split_read_alignments', command)
Example #13
0
 def target_coverage_bamutil_interval(self, bam_in, coverage_out):
     '''Calculate target coverage using bamutil'''
     command = 'bam stats --basic --in {bam_in} --regionList {fragment_bed} &> {coverage_out}'.format(
         bam_in=bam_in,
         fragment_bed=self.fragment_bed,
         coverage_out=coverage_out)
     run_stage(self.state, 'target_coverage_bamutil_interval', command)
Example #14
0
 def extract_genes_bedtools(self, bam_in, bam_out):
     '''Extract MMR genes from the sorted BAM file'''
     bed_file = self.state.config.get_stage_option('extract_genes_bedtools',
                                                   'bed')
     command = 'bedtools intersect -abam {bam_in} -b {bed_file} > {bam_out}' \
               .format(bam_in=bam_in, bed_file=bed_file, bam_out=bam_out)
     run_stage(self.state, 'extract_genes_bedtools', command)
Example #15
0
 def fastq_to_fasta(self, fastq_in, fasta_out):
     '''Convert FASTQ file to FASTA'''
     # -n flag says keep reads with 'N' (unknown) bases, otherwise
     # they would have been discarded
     # -Q33 means use Illumina quality scores
     command = 'zcat {fastq_in} | fastq_to_fasta -n -Q33 -o {fasta_out}'.format(fastq_in=fastq_in, fasta_out=fasta_out)
     run_stage(self.state, 'fastq_to_fasta', command)
Example #16
0
 def apply_bcf(self, inputs, vcf_out):
     '''Apply BCF'''
     vcf_in = inputs
     cores = self.get_stage_options('apply_bcf', 'cores')
     command = "bcftools filter -e \"ALT='*'\" {vcf_in} > {vcf_out}".format(
         cores=cores, vcf_in=vcf_in, vcf_out=vcf_out)
     run_stage(self.state, 'apply_bcf', command)
Example #17
0
 def apply_vep(self, inputs, vcf_out):
     '''Apply VEP'''
     vcf_in = inputs
     cores = self.get_stage_options('apply_vep', 'cores')
     vep_command = "vep --cache --dir_cache {other_vep} " \
                   "--assembly GRCh37 --refseq --offline " \
                   "--fasta {reference} " \
                   "--sift b --polyphen b --symbol --numbers --biotype --total_length --hgvs --format vcf " \
                   "--vcf --force_overwrite --flag_pick --no_stats " \
                   "--custom {brcaexpath},brcaex,vcf,exact,0,Clinical_significance_ENIGMA," \
                   "Comment_on_clinical_significance_ENIGMA,Date_last_evaluated_ENIGMA," \
                   "Pathogenicity_expert,HGVS_cDNA,HGVS_Protein,BIC_Nomenclature " \
                   "--custom {gnomadpath},gnomAD,vcf,exact,0,AF_NFE,AN_NFE " \
                   "--custom {revelpath},RVL,vcf,exact,0,REVEL_SCORE " \
                   "--plugin MaxEntScan,{maxentscanpath} " \
                   "--plugin ExAC,{exacpath},AC,AN " \
                   "--plugin dbNSFP,{dbnsfppath},REVEL_score,REVEL_rankscore " \
                   "--plugin dbscSNV,{dbscsnvpath} " \
                   "--plugin CADD,{caddpath} " \
                   "--fork {cores} " \
                   "-i {vcf_in} " \
                   "-o {vcf_out}".format(other_vep=self.other_vep,
                                         cores=cores,
                                         vcf_out=vcf_out,
                                         vcf_in=vcf_in,
                                         reference=self.reference,
                                         brcaexpath=self.brcaex,
                                         gnomadpath=self.gnomad,
                                         revelpath=self.revel,
                                         maxentscanpath=self.maxentscan,
                                         exacpath=self.exac,
                                         dbnsfppath=self.dbnsfp,
                                         dbscsnvpath=self.dbscsnv,
                                         caddpath=self.cadd)
     run_stage(self.state, 'apply_vep', vep_command)
Example #18
0
 def genotype_svtyper(self, inputs, vcf_out):
     '''Call genotypes on lumpy output using SVTyper'''
     vcf_in, [sample_bam, splitters_bam] = inputs
     command = 'svtyper -B {sample_bam} -S {splitters_bam} ' \
               '-i {vcf_in} -o {vcf_out}' \
               .format(sample_bam=sample_bam, splitters_bam=splitters_bam,
                       vcf_in=vcf_in, vcf_out=vcf_out)
     run_stage(self.state, 'genotype_svtyper', command)
Example #19
0
 def fastq_to_fasta(self, fastq_in, fasta_out):
     '''Convert FASTQ file to FASTA'''
     # -n flag says keep reads with 'N' (unknown) bases, otherwise
     # they would have been discarded
     # -Q33 means use Illumina quality scores
     command = 'zcat {fastq_in} | fastq_to_fasta -n -Q33 -o {fasta_out}'.format(
         fastq_in=fastq_in, fasta_out=fasta_out)
     run_stage(self.state, 'fastq_to_fasta', command)
Example #20
0
 def filter_stats(self, txt_in, txt_out):
     '''filter the summary file to make a 'passed' file'''
     # Only mark samples as pass if >= 80% of target is covered at at least 10X
     # Set to 0 for now since I want to make everything pass
     awk_comm = "{if($8 >= 0){print $1\".sorted.locatit.bam\"}}"
     command = "awk '{awk_comm}' {summary_file} > {final_file}".format(
         awk_comm=awk_comm, summary_file=txt_in, final_file=txt_out)
     run_stage(self.state, 'filter_stats', command)
Example #21
0
 def structural_variants_lumpy(self, inputs, vcf_out):
     '''Call structural variants with lumpy'''
     sample_bam, [splitters_bam, discordants_bam] = inputs
     command = 'lumpyexpress -B {sample_bam} -S {splitters_bam} ' \
               '-D {discordants_bam} -o {vcf}' \
               .format(sample_bam=sample_bam, splitters_bam=splitters_bam,
                       discordants_bam=discordants_bam, vcf=vcf_out)
     run_stage(self.state, 'structural_variants_lumpy', command)
Example #22
0
 def apply_multicov(self, bam_in, multicov):
     '''Samtools mpileup'''
     # bam_in = bam_in
     bams = ' '.join([bam for bam in bam_in])
     safe_make_dir('coverage')
     command = 'bedtools multicov -bams {bams} -bed {target_bed} > {multicov}'.format(
         bams=bams, target_bed=self.target_bed, multicov=multicov)
     run_stage(self.state, 'apply_multicov', command)
Example #23
0
 def apply_summarize_picard(self, input, output):
     '''Summarize picard coverage'''
     input = input
     # bams = ' '.join([bam for bam in bam_in])
     # safe_make_dir('variants')
     command = 'python coverage_summary.py > {output} '.format(
         output=output)
     run_stage(self.state, 'apply_summarize_picard', command)
Example #24
0
 def rare_variants_famseq(self, selected_vcf, vcf_out):
     '''Call rare variants with pedigree information using FamSeq'''
     # e.g. FamSeq vcf -vcfFile ../TestData/test.vcf -pedFile ../TestData/fam01.ped -output test.FamSeq.vcf -v
     # FamSeq methods - 1: Bayesian network; 2: Elston-Stewart algorithm; 3: MCMC
     command = "PATH=/vlsci/LSC0007/shared/jessica_testing/software/FamSeq/src/:$PATH ; " \
               "FamSeq vcf -method 2 -vcfFile {selected_vcf} -pedFile {ped_file} -output {vcf_out}".format(selected_vcf=selected_vcf,
                         ped_file=self.famseq_ped_file, vcf_out=vcf_out)
     run_stage(self.state, 'rare_variants_famseq', command)
Example #25
0
 def structural_variants_lumpy(self, inputs, vcf_out):
     '''Call structural variants with lumpy'''
     sample_bam, [splitters_bam, discordants_bam] = inputs
     command = 'lumpyexpress -B {sample_bam} -S {splitters_bam} ' \
               '-D {discordants_bam} -o {vcf}' \
               .format(sample_bam=sample_bam, splitters_bam=splitters_bam,
                       discordants_bam=discordants_bam, vcf=vcf_out)
     run_stage(self.state, 'structural_variants_lumpy', command)
Example #26
0
 def generate_amplicon_metrics(self, bam_in, txt_out, sample):
     '''Generate depth information for each amplicon and sample for heatmap plotting'''
     command = 'bedtools coverage -f 5E-1 -a {bed_intervals} -b {bam_in} | ' \
               'sed "s/$/	{sample}/g" > {txt_out}'.format(bed_intervals=self.interval_file,
                                                         bam_in=bam_in,
                                                         sample=sample,
                                                         txt_out=txt_out)
     run_stage(self.state, 'generate_amplicon_metrics', command)
Example #27
0
 def translocations_delly(self, bams_in, vcf_out):
     '''Call translocatins with delly'''
     bams_args = ' '.join(bams_in)
     threads = self.state.config.get_stage_option('structural_variants_delly', 'cores') 
     exclude = self.state.config.get_stage_option('structural_variants_delly', 'exclude') 
     command = 'OMP_NUM_THREADS={threads} delly -t TRA -x {exclude} -o {vcf_out} -g {reference} {bams}' \
         .format(threads=threads, exclude=exclude, vcf_out=vcf_out, reference=self.reference, bams=bams_args)
     run_stage(self.state, 'structural_variants_delly', command)
Example #28
0
 def genotype_svtyper(self, inputs, vcf_out):
     '''Call genotypes on lumpy output using SVTyper'''
     vcf_in, [sample_bam, splitters_bam] = inputs
     command = 'svtyper -B {sample_bam} -S {splitters_bam} ' \
               '-i {vcf_in} -o {vcf_out}' \
               .format(sample_bam=sample_bam, splitters_bam=splitters_bam,
                       vcf_in=vcf_in, vcf_out=vcf_out)
     run_stage(self.state, 'genotype_svtyper', command)
Example #29
0
 def apply_samtools_mpileup(self, bam_in, mpileup_out_bcf):
     '''Samtools mpileup'''
     # bam_in = bam_in
     bams = ' '.join([bam for bam in bam_in])
     safe_make_dir('variants')
     command = 'samtools mpileup -t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -go {mpileup_out_bcf} ' \
               '-f {reference} {bams}'.format(
                       mpileup_out_bcf=mpileup_out_bcf,reference=self.reference,bams=bams)
     run_stage(self.state, 'apply_samtools_mpileup', command)
Example #30
0
    def alignment_stats_bamtools(self, bam_in, outputs):
        '''Get alignment stats using Bamtools'''
        command = "bamtools stats -in {bam} > {out}".format(bam=bam_in, out=outputs)
        run_stage(self.state, 'alignment_stats_bamtools', command)
	
	def snpEff_annotate(self, vcf_in, outputs):
	    '''Get annotation results using snpEff'''
		command = "SnpEff -lof {vcf_in} > {out}".format(vcf_in=vcf_in,out=outputs)
		run_stage(self.state, 'snpEff_annotation_tools', command)
Example #31
0
 def apply_homopolymer_ann(self, inputs, vcf_out):
     '''Apply HomopolymerRun annotation to undr_rover output'''
     vcf_in = inputs
     # safe_make_dir('variants')
     command = "echo \"##INFO=<ID=HRUN,Number=1,Type=String,Description=\"HRun\">\" > header.tmp; "\
                 "bcftools annotate -a {hrfile} -c CHROM,FROM,TO,HRUN " \
                 "-h header.tmp " \
                 "{vcf_in} > {vcf_out}".format(hrfile=self.hrfile,vcf_in=vcf_in,vcf_out=vcf_out)
     run_stage(self.state, 'apply_cat_vcf', command)
Example #32
0
 def apply_vt(self, inputs, vcf_out):
     '''Apply NORM'''
     vcf_in = inputs
     cores = self.get_stage_options('apply_vt', 'cores')
     vt_command = "{vt_path} decompose -s {vcf_in} - | {vt_path2} normalize -r {reference} " \
                 "-o {vcf_out} - ".format(
                 vt_path=self.vt_path, vcf_in=vcf_in, vt_path2=self.vt_path, reference=self.reference,
                 vcf_out=vcf_out)
     run_stage(self.state, 'apply_vt', vt_command)
Example #33
0
 def read_samples(self, input_pth, outputs):
     '''Reads the list of pass samples and touches files accordingly 
     in the alignments/pass_samples folder'''
     with open(input_pth, 'r') as inputf:
         pass_files = inputf.read().split('\n')
     command_l = []
     for f in pass_files:
         command_l.append("metrics/pass_samples/{}".format(f))
     command = 'touch {}'.format(' '.join(command_l))
     run_stage(self.state, 'read_samples', command)
Example #34
0
 def apply_vcfanno(self, inputs, vcf_out):
     '''Apply anno'''
     vcf_in = inputs
     #cores = self.get_stage_options('apply_snpeff', 'cores')
     anno_command = "./vcfanno_linux64 -lua {annolua} {anno} {vcf_in} > {vcf_out}".format(
         annolua=self.annolua,
         anno=self.anno,
         vcf_in=vcf_in,
         vcf_out=vcf_out)
     run_stage(self.state, 'apply_vcfanno', anno_command)
Example #35
0
 def translocations_delly(self, bams_in, vcf_out):
     '''Call translocatins with delly'''
     bams_args = ' '.join(bams_in)
     threads = self.state.config.get_stage_option(
         'structural_variants_delly', 'cores')
     exclude = self.state.config.get_stage_option(
         'structural_variants_delly', 'exclude')
     command = 'OMP_NUM_THREADS={threads} delly -t TRA -x {exclude} -o {vcf_out} -g {reference} {bams}' \
         .format(threads=threads, exclude=exclude, vcf_out=vcf_out, reference=self.reference, bams=bams_args)
     run_stage(self.state, 'structural_variants_delly', command)
Example #36
0
 def apply_snpeff(self, inputs, vcf_out):
     '''Apply SnpEFF'''
     vcf_in = inputs
     #cores = self.get_stage_options('apply_snpeff', 'cores')  apply_snpeff
     # mem = int(self.state.config.get_stage_options(stage, 'mem'))
     mem = int(self.get_stage_options('apply_snpeff', 'mem')) - 2
     snpeff_command = "java -Xmx{mem}g -jar {snpeff_path} eff -c {snpeff_conf} " \
                 "-canon GRCh37.75 {vcf_in} | bgzip -c > {vcf_out}".format(
                 mem=mem, snpeff_path=self.snpeff_path, snpeff_conf=self.snpeff_conf,
                 vcf_in=vcf_in, vcf_out=vcf_out)
     run_stage(self.state, 'apply_snpeff', snpeff_command)
Example #37
0
 def sort_bam_sambamba(self, bam_in, sorted_bam_out):
     '''Sort the reads in a bam file using sambamba'''
     cores = self.state.config.get_stage_option('sort_bam_sambamba', 'cores')
     # Get the tmp directory
     tmp = self.state.config.get_option('tmp') 
     # Get the amount of memory requested for the job
     mem = int(self.state.config.get_stage_option('sort_bam_sambamba', 'mem'))
     mem_limit = max(mem - 4, 1)
     command = 'sambamba sort --nthreads={cores} --memory-limit={mem}GB --tmpdir={tmp} --out={output_bam} {input_bam}' \
               .format(cores=cores, mem=mem_limit, tmp=tmp, input_bam=bam_in, output_bam=sorted_bam_out)
     run_stage(self.state, 'sort_bam_sambamba', command)
Example #38
0
 def structural_variants_pindel(self, inputs, output):
     '''Call structural variants with pindel'''
     bam_in, [config_in, reference_in] = inputs
     cores = self.state.config.get_stage_option(
         'structural_variants_pindel', 'cores')
     command = 'pindel -T {threads} -f {reference} -i {config} -c ALL -o {output}'.format(
         threads=cores,
         reference=reference_in,
         config=config_in,
         output=output)
     run_stage(self.state, 'structural_variants_pindel', command)
Example #39
0
    def run_connor(self, bam_in, bam_out):
        '''run connor on bam file'''

        command = 'connor --force -f {CONSENSUS_FREQ_THRESHOLD} ' \
                  '-s {MIN_FAMILY_SIZE_THRESHOLD} ' \
                  '-d {UMT_DISTANCE_THRESHOLD} ' \
                  '{bam_in} {bam_out}'.format(
                                    CONSENSUS_FREQ_THRESHOLD=self.CONSENSUS_FREQ_THRESHOLD,
                                    MIN_FAMILY_SIZE_THRESHOLD=self.MIN_FAMILY_SIZE_THRESHOLD,
                                    UMT_DISTANCE_THRESHOLD=self.UMT_DISTANCE_THRESHOLD,
                                    bam_in=bam_in,bam_out=bam_out)
        run_stage(self.state, 'run_connor', command)
Example #40
0
 def align_bwa(self, inputs, bam_out, sample_id):
     '''Align the paired end fastq files to the reference genome using bwa'''
     fastq_read1_in, fastq_read2_in = inputs
     cores = self.get_stage_options('align_bwa', 'cores')
     read_group = '"@RG\tID:{sample}\tSM:{sample}\tPL:Illumina"'.format(sample=sample_id)
     command = 'bwa mem -t {cores} -R {read_group} {reference} {fastq_read1} {fastq_read2} ' \
               '| samtools view -b -h -o {bam} -' \
               .format(cores=cores,
                   read_group=read_group,
                   fastq_read1=fastq_read1_in,
                   fastq_read2=fastq_read2_in,
                   reference=self.reference,
                   bam=bam_out)
     run_stage(self.state, 'align_bwa', command)
Example #41
0
    def structural_variants_socrates(self, bam_in, variants_out, sample_dir):
        '''Call structural variants with Socrates'''
        threads = self.state.config.get_stage_option('structural_variants_socrates', 'cores') 
        # jvm_mem is in gb
        jvm_mem = self.state.config.get_stage_option('structural_variants_socrates', 'jvm_mem') 
        bowtie2_ref_dir = self.state.config.get_stage_option('structural_variants_socrates', 'bowtie2_ref_dir') 
        output_dir = os.path.join(sample_dir, 'socrates')
        safe_make_dir(output_dir)
        command = \
        '''
cd {output_dir}
export _JAVA_OPTIONS='-Djava.io.tmpdir={output_dir}'
Socrates all -t {threads} --bowtie2_threads {threads} --bowtie2_db {bowtie2_ref_dir} --jvm_memory {jvm_mem}g {bam}
        '''.format(output_dir=output_dir, threads=threads, bowtie2_ref_dir=bowtie2_ref_dir, jvm_mem=jvm_mem, bam=bam_in)
        run_stage(self.state, 'structural_variants_socrates', command)
Example #42
0
 def align_bwa(self, inputs, bam_out, read_id, lib, lane, sample_id):
     # def align_bwa(self, inputs, bam_out, sample_id):
     '''Align the paired end fastq files to the reference genome using bwa'''
     fastq_read1_in, fastq_read2_in = inputs
     cores = self.get_stage_options('align_bwa', 'cores')
     safe_make_dir('alignments/{sample}'.format(sample=sample_id))
     read_group = '"@RG\\tID:{readid}\\tSM:{sample}\\tPU:lib1\\tLN:{lane}\\tPL:Illumina"' \
         .format(readid=read_id, lib=lib, lane=lane, sample=sample_id)
     command = 'bwa mem -t {cores} -R {read_group} {reference} {fastq_read1} {fastq_read2} ' \
               '| samtools view -b -h -o {bam} -' \
               .format(cores=cores,
                       read_group=read_group,
                       fastq_read1=fastq_read1_in,
                       fastq_read2=fastq_read2_in,
                       reference=self.reference,
                       bam=bam_out)
     run_stage(self.state, 'align_bwa', command)
Example #43
0
 def align_bwa(self, inputs, bam_out, sample, id):
     """Align the paired end fastq files to the reference genome using bwa"""
     fastq_read1_in, fastq_read2_in = inputs
     cores = self.get_stage_options("align_bwa", "cores")
     read_group = '"@RG\\tID:{id}\\tSM:{sample}\\tPL:Illumina"'.format(sample=sample, id=id)
     command = (
         "bwa mem -t {cores} -R {read_group} {reference} {fastq_read1} {fastq_read2} "
         "| samtools view -b -h -o {bam} -".format(
             cores=cores,
             read_group=read_group,
             fastq_read1=fastq_read1_in,
             fastq_read2=fastq_read2_in,
             reference=self.reference,
             bam=bam_out,
         )
     )
     safe_make_dir("results/alignments/{sample}".format(sample=sample))
     run_stage(self.state, "align_bwa", command)
Example #44
0
 def align_bwa(self, inputs, bam_out, sample):
     '''Align the paired end fastq files to the reference genome using bwa'''
     fastq_read1_in, fastq_read2_in = inputs
     # Get the read group information for this sample from the configuration file
     read_group = self.state.config.get_read_group(sample)
     # Get the number of cores to request for the job, this translates into the
     # number of threads to give to bwa's -t option
     cores = self.state.config.get_stage_option('align_bwa', 'cores')
     # Run bwa and pipe the output through samtools view to generate a BAM file
     command = 'bwa mem -t {cores} -R "{read_group}" {reference} {fastq_read1} {fastq_read2} ' \
               '| samtools view -S -b - > {bam}' \
               .format(cores=cores,
                   read_group=read_group,
                   fastq_read1=fastq_read1_in,
                   fastq_read2=fastq_read2_in,
                   reference=self.reference,
                   bam=bam_out)
     run_stage(self.state, 'align_bwa', command)
Example #45
0
 def peak_picker_hires(self, mzml_in, mzml_out):
     '''Executes the peak picking with high_res algorithm'''
     cores = self.state.config.get_stage_option('baseline_filter', 'cores')
     command = "PeakPickerHiRes -threads {cores} -in {mzml_in} -out {mzml_out}".format(cores=cores, mzml_in=mzml_in, mzml_out=mzml_out)
     run_stage(self.state, 'peak_picker_hires', command)
Example #46
0
 def baseline_filter(self, mzml_in, mzml_out):
     '''Executes the top-hat filter to remove the baseline of an MS experiment.'''
     cores = self.state.config.get_stage_option('baseline_filter', 'cores')
     command = "BaselineFilter -threads {cores} -in {mzml_in} -out {mzml_out}".format(cores=cores, mzml_in=mzml_in, mzml_out=mzml_out)
     run_stage(self.state, 'baseline_filter', command)
Example #47
0
 def noise_filter_sgolay(self, mzml_in, mzml_out):
     '''Filter noise using Savitzky Golay'''
     cores = self.state.config.get_stage_option('noise_filter_sgolay', 'cores')
     command = "NoiseFilterSGolay -threads {cores} -in {mzml_in} -out {mzml_out}".format(cores=cores, mzml_in=mzml_in, mzml_out=mzml_out)
     run_stage(self.state, 'noise_filter_sgolay', command)
Example #48
0
 def resample(self, mzml_in, mzml_out):
     '''Resample MZML file to new sampling rate'''
     cores = self.state.config.get_stage_option('resample', 'cores')
     rate = self.state.config.get_stage_option('resample', 'rate')
     command = "Resampler -sampling_rate {rate} -threads {cores} -in {mzml_in} -out {mzml_out}".format(rate=rate, cores=cores, mzml_in=mzml_in, mzml_out=mzml_out)
     run_stage(self.state, 'resample', command)
Example #49
0
 def extract_discordant_alignments(self, bam_in, discordants_bam_out):
     '''Extract the discordant paired-end alignments using samtools'''
     command = 'samtools view -b -F 1294 {input_bam} > {output_bam}' \
               .format(input_bam=bam_in, output_bam=discordants_bam_out)
     run_stage(self.state, 'extract_discordant_alignments', command)
Example #50
0
 def extract_chromosomes_samtools(self, bam_in, bam_out):
     '''Extract selected chomosomes from the bam files'''
     command = 'samtools view -h -b {bam_in} chr2 chr3 chr7 > {bam_out}' \
               .format(bam_in=bam_in, bam_out=bam_out)
     run_stage(self.state, 'extract_chromosomes_samtools', command)
Example #51
0
 def bamtools_stats(self, bam_in, stats_out):
     '''Generate alignment stats with bamtools'''
     command = 'bamtools stats -in {bam} > {stats}' \
               .format(bam=bam_in, stats=stats_out)
     run_stage(self.state, 'bamtools_stats', command)
Example #52
0
 def fastqc(self, fastq_in, dir_out):
     '''Quality check fastq file using fastqc'''
     safe_make_dir(dir_out)
     command = "fastqc --quiet -o {dir} {fastq}".format(dir=dir_out, fastq=fastq_in)
     run_stage(self.state, 'fastqc', command)
Example #53
0
 def index_reference_samtools(self, reference_in, index_file_out):
     '''Index the reference genome using samtools'''
     command = 'samtools faidx {ref}'.format(ref=reference_in)
     run_stage(self.state, 'index_reference_samtools', command)
Example #54
0
 def sort_bam(self, bam_in, sorted_bam_out, sorted_bam_prefix):
     '''Sort the reads in a bam file using samtools'''
     command = 'samtools sort {input_bam} {output_bam_prefix}' \
               .format(input_bam=bam_in, output_bam_prefix=sorted_bam_prefix)
     run_stage(self.state, 'sort_bam', command)
Example #55
0
 def reference_dictionary_picard(self, reference_in, dict_file_out):
     '''Create a FASTA sequence dictionary for the reference using picard'''
     command = 'java -jar $PICARD_HOME/lib/CreateSequenceDictionary.jar ' \
               'R={ref} O={dict_file}'.format(ref=reference_in, dict_file=dict_file_out)
     run_stage(self.state, 'reference_dictionary_picard', command)
Example #56
0
 def feature_finder_centroid(self, mzml_in, feature_xml_out):
     '''The feature detection application for quantitation (centroided).'''
     cores = self.state.config.get_stage_option('feature_finder_centroid', 'cores')
     command = "FeatureFinderCentroided -threads {cores} -in {mzml_in} -out {feature_out}".format(cores=cores, mzml_in=mzml_in, feature_out=feature_xml_out)
     run_stage(self.state, 'feature_finder_centroid', command)
Example #57
0
 def extract_genes_bedtools(self, bam_in, bam_out):
     '''Extract MMR genes from the sorted BAM file'''
     bed_file = self.state.config.get_stage_option('extract_genes_bedtools', 'bed') 
     command = 'bedtools intersect -abam {bam_in} -b {bed_file} > {bam_out}' \
               .format(bam_in=bam_in, bed_file=bed_file, bam_out=bam_out)
     run_stage(self.state, 'extract_genes_bedtools', command)
Example #58
0
 def index_reference_bowtie2(self, reference_in, index_file_out, output_prefix):
     '''Index the reference genome using bowtie2'''
     command = 'bowtie2-build {ref} {output_prefix}' \
                   .format(ref=reference_in, output_prefix=output_prefix)
     run_stage(self.state, 'index_reference_bowtie2', command)
Example #59
0
def run_java(state, stage, jar_path, mem, args):
    command = java_command(jar_path, mem, args)
    run_stage(state, stage, command)
Example #60
0
 def index_bam(self, bam_in, index_out):
     '''Index a bam file with samtools'''
     command = 'samtools index {bam}'.format(bam=bam_in)
     run_stage(self.state, 'index_bam', command)