def combine_variants(in_vcfs, out_vcf, in_reference_fasta=s['ref']['reference_fasta'], genotype_merge_option='REQUIRE_UNIQUE', mem_req=6 * 1024): """ :param genotype_merge_option: select from the following: UNIQUIFY - Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. PRIORITIZE - Take genotypes in priority order (see the priority argument). UNSORTED - Take the genotypes in any order. REQUIRE_UNIQUE - Require that all samples/genotypes be unique. """ if genotype_merge_option == 'PRIORITIZE': inputs = vcf_list_to_input([(i, p) for i, p in enumerate(in_vcfs)]) priority = '%s' % ','.join(map(str, range(len(in_vcfs)))) else: inputs = vcf_list_to_input(in_vcfs) priority = None return r""" {gatk} \ -T CombineVariants \ -R {in_reference_fasta} \ -o {out_vcf} \ {inputs} \ {args} """.format(gatk=gatk(mem_req), args=args(('-priority', priority), ('--genotypemergeoption', genotype_merge_option)), **locals())
def freebayes(reference_fasta=settings['ref']['reference_fasta'], max_complex_gap=2, no_complex=True, in_target_bed=find('bed$'), in_bam=find('bam$'), out_vcf=out_dir('variants.vcf')): return r""" {s[opt][freebayes]} -f {reference_fasta} \ --vcf {out_vcf} \ --targets {in_target_bed} \ {args} \ -m 30 -q 10 -R 0 -S 0 -F 0.1 \ {in_bam} """.format(s=settings, args=args(('--max-complex-gap', max_complex_gap), ('--no-complex', no_complex)), **locals())
def cut_adapt(minimum_length=50, in_fastq1=find('fq.gz|\.fastq|fastq.gz', tags=dict(read_pair='1')), in_fastq2=find('fq.gz|\.fastq|fastq.gz', tags=dict(read_pair='2')), out_fastq1=out_dir('trimmed_r1.fastq.gz'), out_fastq2=out_dir('trimmed_r2.fastq.gz')): # out_fastq1='>( gzip > %s)' % out_fastq1 # out_fastq2='>( gzip > %s)' % out_fastq2 return r""" {s[opt][cutadapt]} \ -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \ -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \ {args} \ -o {out_fastq1} -p {out_fastq2} \ {in_fastq1} {in_fastq2} """.format(s=s, args=args(('--minimum-length', minimum_length)), **locals())