Beispiel #1
0
def combine_variants(in_vcfs,
                     out_vcf,
                     in_reference_fasta=s['ref']['reference_fasta'],
                     genotype_merge_option='REQUIRE_UNIQUE',
                     mem_req=6 * 1024):
    """


    :param genotype_merge_option: select from the following:
        UNIQUIFY - Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
        PRIORITIZE - Take genotypes in priority order (see the priority argument).
        UNSORTED - Take the genotypes in any order.
        REQUIRE_UNIQUE - Require that all samples/genotypes be unique.
    """
    if genotype_merge_option == 'PRIORITIZE':
        inputs = vcf_list_to_input([(i, p) for i, p in enumerate(in_vcfs)])
        priority = '%s' % ','.join(map(str, range(len(in_vcfs))))
    else:
        inputs = vcf_list_to_input(in_vcfs)
        priority = None

    return r"""
        {gatk} \
        -T CombineVariants \
        -R {in_reference_fasta} \
        -o {out_vcf} \
        {inputs} \
        {args}
    """.format(gatk=gatk(mem_req),
               args=args(('-priority', priority),
                         ('--genotypemergeoption', genotype_merge_option)),
               **locals())
Beispiel #2
0
def combine_variants(in_vcfs,
                     out_vcf,
                     in_reference_fasta=s['ref']['reference_fasta'],
                     genotype_merge_option='REQUIRE_UNIQUE',
                     mem_req=6 * 1024):
    """


    :param genotype_merge_option: select from the following:
        UNIQUIFY - Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
        PRIORITIZE - Take genotypes in priority order (see the priority argument).
        UNSORTED - Take the genotypes in any order.
        REQUIRE_UNIQUE - Require that all samples/genotypes be unique.
    """
    if genotype_merge_option == 'PRIORITIZE':
        inputs = vcf_list_to_input([(i, p) for i, p in enumerate(in_vcfs)])
        priority = '%s' % ','.join(map(str, range(len(in_vcfs))))
    else:
        inputs = vcf_list_to_input(in_vcfs)
        priority = None

    return r"""
        {gatk} \
        -T CombineVariants \
        -R {in_reference_fasta} \
        -o {out_vcf} \
        {inputs} \
        {args}
    """.format(gatk=gatk(mem_req),
               args=args(('-priority', priority),
                         ('--genotypemergeoption', genotype_merge_option)),
               **locals())
Beispiel #3
0
def freebayes(reference_fasta=settings['ref']['reference_fasta'],
              max_complex_gap=2,
              no_complex=True,
              in_target_bed=find('bed$'), in_bam=find('bam$'),
              out_vcf=out_dir('variants.vcf')):
    return r"""
        {s[opt][freebayes]} -f {reference_fasta} \
        --vcf {out_vcf} \
        --targets {in_target_bed} \
        {args} \
        -m 30 -q 10 -R 0 -S 0 -F 0.1 \
        {in_bam}
    """.format(s=settings,
               args=args(('--max-complex-gap', max_complex_gap),
                         ('--no-complex', no_complex)),
               **locals())
Beispiel #4
0
def freebayes(reference_fasta=settings['ref']['reference_fasta'],
              max_complex_gap=2,
              no_complex=True,
              in_target_bed=find('bed$'),
              in_bam=find('bam$'),
              out_vcf=out_dir('variants.vcf')):
    return r"""
        {s[opt][freebayes]} -f {reference_fasta} \
        --vcf {out_vcf} \
        --targets {in_target_bed} \
        {args} \
        -m 30 -q 10 -R 0 -S 0 -F 0.1 \
        {in_bam}
    """.format(s=settings,
               args=args(('--max-complex-gap', max_complex_gap),
                         ('--no-complex', no_complex)),
               **locals())
Beispiel #5
0
def cut_adapt(minimum_length=50,
              in_fastq1=find('fq.gz|\.fastq|fastq.gz', tags=dict(read_pair='1')),
              in_fastq2=find('fq.gz|\.fastq|fastq.gz', tags=dict(read_pair='2')),
              out_fastq1=out_dir('trimmed_r1.fastq.gz'),
              out_fastq2=out_dir('trimmed_r2.fastq.gz')):
    # out_fastq1='>( gzip > %s)' % out_fastq1
    # out_fastq2='>( gzip > %s)' % out_fastq2
    return r"""
        {s[opt][cutadapt]} \
        -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
        -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
        {args} \
        -o {out_fastq1} -p {out_fastq2} \
        {in_fastq1} {in_fastq2}
    """.format(s=s,
               args=args(('--minimum-length', minimum_length)),
               **locals())
Beispiel #6
0
def cut_adapt(minimum_length=50,
              in_fastq1=find('fq.gz|\.fastq|fastq.gz',
                             tags=dict(read_pair='1')),
              in_fastq2=find('fq.gz|\.fastq|fastq.gz',
                             tags=dict(read_pair='2')),
              out_fastq1=out_dir('trimmed_r1.fastq.gz'),
              out_fastq2=out_dir('trimmed_r2.fastq.gz')):
    # out_fastq1='>( gzip > %s)' % out_fastq1
    # out_fastq2='>( gzip > %s)' % out_fastq2
    return r"""
        {s[opt][cutadapt]} \
        -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
        -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
        {args} \
        -o {out_fastq1} -p {out_fastq2} \
        {in_fastq1} {in_fastq2}
    """.format(s=s,
               args=args(('--minimum-length', minimum_length)),
               **locals())