Esempio n. 1
0
def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
    """
    Spawn a MuTect job for each chromosome on the DNA bams.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict mutect_options: Options specific to MuTect
    :return: Dict of results from running MuTect on every chromosome
             perchrom_mutect:
                 |- 'chr1': fsID
                 |- 'chr2' fsID
                 |
                 |-...
                 |
                 +- 'chrM': fsID
    :rtype: dict
    """
    # Get a list of chromosomes to handle
    if mutect_options['chromosomes']:
        chromosomes = mutect_options['chromosomes']
    else:
        chromosomes = sample_chromosomes(job, mutect_options['genome_fai'])
    perchrom_mutect = defaultdict()
    for chrom in chromosomes:
        perchrom_mutect[chrom] = job.addChildJobFn(
            run_mutect_perchrom, tumor_bam, normal_bam, univ_options, mutect_options, chrom,
            memory='6G', disk=PromisedRequirement(mutect_disk,
                                                  tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                  normal_bam['normal_dna_fix_pg_sorted.bam'],
                                                  mutect_options['genome_fasta'],
                                                  mutect_options['dbsnp_vcf'],
                                                  mutect_options['cosmic_vcf'])).rv()
    return perchrom_mutect
Esempio n. 2
0
def run_strelka(job,
                tumor_bam,
                normal_bam,
                univ_options,
                strelka_options,
                split=True):
    """
    Run the strelka subgraph on the DNA bams.  Optionally split the results into per-chromosome
    vcfs.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict strelka_options: Options specific to strelka
    :param bool split: Should the results be split into perchrom vcfs?
    :return: Either the fsID to the genome-level vcf or a dict of results from running strelka
             on every chromosome
             perchrom_strelka:
                 |- 'chr1':
                 |      |-'snvs': fsID
                 |      +-'indels': fsID
                 |- 'chr2':
                 |      |-'snvs': fsID
                 |      +-'indels': fsID
                 |-...
                 |
                 +- 'chrM':
                        |-'snvs': fsID
                        +-'indels': fsID
    :rtype: toil.fileStore.FileID|dict
    """
    if strelka_options['chromosomes']:
        chromosomes = strelka_options['chromosomes']
    else:
        chromosomes = sample_chromosomes(job, strelka_options['genome_fai'])
    num_cores = min(len(chromosomes), univ_options['max_cores'])
    strelka = job.wrapJobFn(run_strelka_full,
                            tumor_bam,
                            normal_bam,
                            univ_options,
                            strelka_options,
                            disk=PromisedRequirement(
                                strelka_disk,
                                tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                normal_bam['normal_dna_fix_pg_sorted.bam'],
                                strelka_options['genome_fasta']),
                            memory='6G',
                            cores=num_cores)
    job.addChild(strelka)
    if split:
        unmerge_strelka = job.wrapJobFn(wrap_unmerge, strelka.rv(),
                                        chromosomes, strelka_options,
                                        univ_options).encapsulate()
        strelka.addChild(unmerge_strelka)
        return unmerge_strelka.rv()
    else:
        return strelka.rv()
Esempio n. 3
0
def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split=True):
    """
    This module will spawn a strelka job for each chromosome on the DNA bams.

    ARGUMENTS
    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. strelka_options: Dict of parameters specific to strelka
         strelka_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
              |- 'cosmic_vcf': <JSid for cosmic vcf file>
              |- 'cosmic_idx': <JSid for cosmic vcf index file>
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_dict': <JSid for genome fasta dict file>
              +- 'genome_fai': <JSid for genome fasta index file>

    RETURN VALUES
    1. perchrom_strelka: Dict of results of strelka per chromosome
         perchrom_strelka
              |- 'chr1'
              |   +- 'strelka_chr1.vcf': <JSid>
              |   +- 'strelka_chr1.out': <JSid>
              |- 'chr2'
              |   |- 'strelka_chr2.vcf': <JSid>
              |   +- 'strelka_chr2.out': <JSid>
             etc...

    This module corresponds to node 11 on the tree
    """
    chromosomes = sample_chromosomes(job, strelka_options['genome_fai'])
    num_cores = min(len(chromosomes), univ_options['max_cores'])
    strelka = job.wrapJobFn(run_strelka_full, tumor_bam, normal_bam, univ_options,
                            strelka_options,
                            disk=PromisedRequirement(strelka_disk,
                                                     tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                     normal_bam['normal_dna_fix_pg_sorted.bam'],
                                                     strelka_options['genome_fasta']),
                            memory='6G',
                            cores=num_cores)
    job.addChild(strelka)
    if split:
        unmerge_strelka = job.wrapJobFn(wrap_unmerge, strelka.rv(), strelka_options, univ_options
                                        ).encapsulate()
        strelka.addChild(unmerge_strelka)
        return unmerge_strelka.rv()
    else:
        return strelka.rv()
Esempio n. 4
0
def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_options, split=True):
    """
    Run the SomaticSniper subgraph on the DNA bams.  Optionally split the results into
    per-chromosome vcfs.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict somaticsniper_options: Options specific to SomaticSniper
    :param bool split: Should the results be split into perchrom vcfs?
    :return: Either the fsID to the genome-level vcf or a dict of results from running SomaticSniper
             on every chromosome
             perchrom_somaticsniper:
                 |- 'chr1': fsID
                 |- 'chr2' fsID
                 |
                 |-...
                 |
                 +- 'chrM': fsID
    :rtype: toil.fileStore.FileID|dict
    """
    # Get a list of chromosomes to handle
    if somaticsniper_options['chromosomes']:
        chromosomes = somaticsniper_options['chromosomes']
    else:
        chromosomes = sample_chromosomes(job, somaticsniper_options['genome_fai'])
    perchrom_somaticsniper = defaultdict()
    snipe = job.wrapJobFn(run_somaticsniper_full, tumor_bam, normal_bam, univ_options,
                          somaticsniper_options,
                          disk=PromisedRequirement(sniper_disk,
                                                   tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                   normal_bam['normal_dna_fix_pg_sorted.bam'],
                                                   somaticsniper_options['genome_fasta']),
                          memory='6G')
    pileup = job.wrapJobFn(run_pileup, tumor_bam, univ_options, somaticsniper_options,
                           disk=PromisedRequirement(pileup_disk,
                                                    tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                    somaticsniper_options['genome_fasta']),
                           memory='6G')
    filtersnipes = job.wrapJobFn(filter_somaticsniper, tumor_bam, snipe.rv(), pileup.rv(),
                                 univ_options, somaticsniper_options,
                                 disk=PromisedRequirement(sniper_filter_disk,
                                                          tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                          somaticsniper_options['genome_fasta']),
                                 memory='6G')

    job.addChild(snipe)
    job.addChild(pileup)
    snipe.addChild(filtersnipes)
    pileup.addChild(filtersnipes)
    if split:
        unmerge_snipes = job.wrapJobFn(unmerge, filtersnipes.rv(), 'somaticsniper', chromosomes,
                                       somaticsniper_options, univ_options)
        filtersnipes.addChild(unmerge_snipes)
        return unmerge_snipes.rv()
    else:
        return filtersnipes.rv()
Esempio n. 5
0
def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
    """
    This module will spawn a mutect job for each chromosome on the DNA bams.

    ARGUMENTS
    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. mutect_options: Dict of parameters specific to mutect
         mutect_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
              |- 'cosmic_vcf': <JSid for cosmic vcf file>
              |- 'cosmic_idx': <JSid for cosmic vcf index file>
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_dict': <JSid for genome fasta dict file>
              +- 'genome_fai': <JSid for genome fasta index file>

    RETURN VALUES
    1. perchrom_mutect: Dict of results of mutect per chromosome
         perchrom_mutect
              |- 'chr1'
              |   +- 'mutect_chr1.vcf': <JSid>
              |   +- 'mutect_chr1.out': <JSid>
              |- 'chr2'
              |   |- 'mutect_chr2.vcf': <JSid>
              |   +- 'mutect_chr2.out': <JSid>
             etc...

    This module corresponds to node 11 on the tree
    """
    # Get a list of chromosomes to handle
    chromosomes = sample_chromosomes(job, mutect_options['genome_fai'])
    perchrom_mutect = defaultdict()
    for chrom in chromosomes:
        perchrom_mutect[chrom] = job.addChildJobFn(
            run_mutect_perchrom,
            tumor_bam,
            normal_bam,
            univ_options,
            mutect_options,
            chrom,
            memory='6G',
            disk=PromisedRequirement(
                mutect_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                normal_bam['normal_dna_fix_pg_sorted.bam'],
                mutect_options['genome_fasta'], mutect_options['dbsnp_vcf'],
                mutect_options['cosmic_vcf'])).rv()
    return perchrom_mutect
Esempio n. 6
0
def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
    """
    This module will spawn a radia job for each chromosome, on the RNA and DNA.

    ARGUMENTS
    1. rna_bam: Dict of input STAR bams
         rna_bam
              |- 'rnaAligned.sortedByCoord.out.bam': REFER run_star()
                                |- 'rna_fix_pg_sorted.bam': <JSid>
                                +- 'rna_fix_pg_sorted.bam.bai': <JSid>
    2. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    3. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    4. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    5. radia_options: Dict of parameters specific to radia
         radia_options
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_fai': <JSid for genome fai file>

    RETURN VALUES
    1. perchrom_radia: Dict of results of radia per chromosome
         perchrom_radia
              |- 'chr1'
              |   +- 'radia_filtered_chr1.vcf': <JSid>
              |- 'chr2'
              |   +- 'radia_filtered_chr2.vcf': <JSid>
             etc...

    This module corresponds to node 11 on the tree
    """
    job.fileStore.logToMaster('Running spawn_radia on %s' % univ_options['patient'])
    rna_bam_key = 'rnaAligned.sortedByCoord.out.bam'  # to reduce next line size
    bams = {'tumor_rna': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
            'tumor_rnai': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam.bai'],
            'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
            'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
            'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'],
            'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']}
    # Get a list of chromosomes to process
    chromosomes = sample_chromosomes(job, radia_options['genome_fai'])
    perchrom_radia = defaultdict()
    for chrom in chromosomes:
        radia = job.addChildJobFn(run_radia_perchrom, bams, univ_options, radia_options, chrom,
                                  disk='60G', memory='6G')
        filter_radia = radia.addChildJobFn(run_filter_radia, bams, radia.rv(), univ_options,
                                           radia_options, chrom, disk='60G', memory='6G')
        perchrom_radia[chrom] = filter_radia.rv()
    return perchrom_radia
Esempio n. 7
0
def run_muse(job, tumor_bam, normal_bam, univ_options, muse_options):
    """
    This module will spawn a muse job for each chromosome on the DNA bams.

    ARGUMENTS
    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. muse_options: Dict of parameters specific to muse
         muse_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
              |- 'cosmic_vcf': <JSid for cosmic vcf file>
              |- 'cosmic_idx': <JSid for cosmic vcf index file>
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_dict': <JSid for genome fasta dict file>
              +- 'genome_fai': <JSid for genome fasta index file>

    RETURN VALUES
    1. perchrom_muse: Dict of results of muse per chromosome
         perchrom_muse
              |- 'chr1'
              |   +- <JSid for muse_chr1.vcf>
              |- 'chr2'
              |   +- <JSid for muse_chr2.vcf>
             etc...

    This module corresponds to node 11 on the tree
    """
    # Get a list of chromosomes to handle
    chromosomes = sample_chromosomes(job, muse_options['genome_fai'])
    perchrom_muse = defaultdict()
    for chrom in chromosomes:
        call = job.addChildJobFn(run_muse_perchrom, tumor_bam, normal_bam, univ_options,
                                 muse_options, chrom,
                                 disk=PromisedRequirement(muse_disk,
                                                          tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                          normal_bam['normal_dna_fix_pg_sorted.bam'],
                                                          muse_options['genome_fasta']),
                                 memory='6G')
        sump = call.addChildJobFn(run_muse_sump_perchrom, call.rv(), univ_options, muse_options,
                                  chrom,
                                  disk=PromisedRequirement(muse_sump_disk,
                                                           muse_options['dbsnp_vcf']),
                                  memory='6G')
        perchrom_muse[chrom] = sump.rv()
    return perchrom_muse
Esempio n. 8
0
def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
    """
    This module will spawn a mutect job for each chromosome on the DNA bams.

    ARGUMENTS
    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. mutect_options: Dict of parameters specific to mutect
         mutect_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
              |- 'cosmic_vcf': <JSid for cosmic vcf file>
              |- 'cosmic_idx': <JSid for cosmic vcf index file>
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_dict': <JSid for genome fasta dict file>
              +- 'genome_fai': <JSid for genome fasta index file>

    RETURN VALUES
    1. perchrom_mutect: Dict of results of mutect per chromosome
         perchrom_mutect
              |- 'chr1'
              |   +- 'mutect_chr1.vcf': <JSid>
              |   +- 'mutect_chr1.out': <JSid>
              |- 'chr2'
              |   |- 'mutect_chr2.vcf': <JSid>
              |   +- 'mutect_chr2.out': <JSid>
             etc...

    This module corresponds to node 11 on the tree
    """
    # Get a list of chromosomes to handle
    chromosomes = sample_chromosomes(job, mutect_options['genome_fai'])
    perchrom_mutect = defaultdict()
    for chrom in chromosomes:
        perchrom_mutect[chrom] = job.addChildJobFn(run_mutect_perchrom, tumor_bam, normal_bam,
                                                   univ_options, mutect_options, chrom, disk='60G',
                                                   memory='6G').rv()
    return perchrom_mutect
Esempio n. 9
0
def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options,
              radia_options):
    """
    Spawn a RADIA job for each chromosome on the input bam trios.

    :param dict rna_bam: Dict of bam and bai for tumor DNA-Seq.  It can be one of two formats
           rna_bam:   # Just the genomic bam and bai
                |- 'rna_genome_sorted.bam': fsID
                +- 'rna_genome_sorted.bam.bai': fsID
           OR
           rna_bam:   # The output from run_star
               |- 'rna_transcriptome.bam': fsID
               |- 'rna_genome':     # Only this part will be used
                       |- 'rna_genome_sorted.bam': fsID
                       +- 'rna_genome_sorted.bam.bai': fsID
    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict radia_options: Options specific to RADIA
    :return: Dict of results from running RADIA on every chromosome
             perchrom_radia:
                 |- 'chr1': fsID
                 |- 'chr2' fsID
                 |
                 |-...
                 |
                 +- 'chrM': fsID
    :rtype: dict
    """
    if 'rna_genome' in rna_bam.keys():
        rna_bam = rna_bam['rna_genome']
    elif set(rna_bam.keys()) == {
            'rna_genome_sorted.bam', 'rna_genome_sorted.bam.bai'
    }:
        pass
    else:
        raise RuntimeError(
            'An improperly formatted dict was passed to rna_bam.')

    bams = {
        'tumor_rna': rna_bam['rna_genome_sorted.bam'],
        'tumor_rnai': rna_bam['rna_genome_sorted.bam.bai'],
        'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']
    }
    # Get a list of chromosomes to process
    if radia_options['chromosomes']:
        chromosomes = radia_options['chromosomes']
    else:
        chromosomes = sample_chromosomes(job, radia_options['genome_fai'])
    perchrom_radia = defaultdict()
    for chrom in chromosomes:
        radia = job.addChildJobFn(
            run_radia_perchrom,
            bams,
            univ_options,
            radia_options,
            chrom,
            memory='6G',
            disk=PromisedRequirement(
                radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                normal_bam['normal_dna_fix_pg_sorted.bam'],
                rna_bam['rna_genome_sorted.bam'],
                radia_options['genome_fasta']))
        filter_radia = radia.addChildJobFn(
            run_filter_radia,
            bams,
            radia.rv(),
            univ_options,
            radia_options,
            chrom,
            memory='6G',
            disk=PromisedRequirement(
                radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                normal_bam['normal_dna_fix_pg_sorted.bam'],
                rna_bam['rna_genome_sorted.bam'],
                radia_options['genome_fasta']))
        perchrom_radia[chrom] = filter_radia.rv()
    job.fileStore.logToMaster('Ran spawn_radia on %s successfully' %
                              univ_options['patient'])
    return perchrom_radia
Esempio n. 10
0
def run_strelka(job,
                tumor_bam,
                normal_bam,
                univ_options,
                strelka_options,
                split=True):
    """
    This module will spawn a strelka job for each chromosome on the DNA bams.

    ARGUMENTS
    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. strelka_options: Dict of parameters specific to strelka
         strelka_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
              |- 'cosmic_vcf': <JSid for cosmic vcf file>
              |- 'cosmic_idx': <JSid for cosmic vcf index file>
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_dict': <JSid for genome fasta dict file>
              +- 'genome_fai': <JSid for genome fasta index file>

    RETURN VALUES
    1. perchrom_strelka: Dict of results of strelka per chromosome
         perchrom_strelka
              |- 'chr1'
              |   +- 'strelka_chr1.vcf': <JSid>
              |   +- 'strelka_chr1.out': <JSid>
              |- 'chr2'
              |   |- 'strelka_chr2.vcf': <JSid>
              |   +- 'strelka_chr2.out': <JSid>
             etc...

    This module corresponds to node 11 on the tree
    """
    chromosomes = sample_chromosomes(job, strelka_options['genome_fai'])
    num_cores = min(len(chromosomes), univ_options['max_cores'])
    strelka = job.wrapJobFn(run_strelka_full,
                            tumor_bam,
                            normal_bam,
                            univ_options,
                            strelka_options,
                            disk=PromisedRequirement(
                                strelka_disk,
                                tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                normal_bam['normal_dna_fix_pg_sorted.bam'],
                                strelka_options['genome_fasta']),
                            memory='6G',
                            cores=num_cores)
    job.addChild(strelka)
    if split:
        unmerge_strelka = job.wrapJobFn(wrap_unmerge, strelka.rv(),
                                        strelka_options,
                                        univ_options).encapsulate()
        strelka.addChild(unmerge_strelka)
        return unmerge_strelka.rv()
    else:
        return strelka.rv()
Esempio n. 11
0
def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options,
              radia_options):
    """
    This module will spawn a radia job for each chromosome, on the RNA and DNA.

    ARGUMENTS
    1. rna_bam: Dict of input STAR bams
         rna_bam
              |- 'rnaAligned.sortedByCoord.out.bam': REFER run_star()
                                |- 'rna_fix_pg_sorted.bam': <JSid>
                                +- 'rna_fix_pg_sorted.bam.bai': <JSid>
    2. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    3. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    4. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    5. radia_options: Dict of parameters specific to radia
         radia_options
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_fai': <JSid for genome fai file>

    RETURN VALUES
    1. perchrom_radia: Dict of results of radia per chromosome
         perchrom_radia
              |- 'chr1'
              |   +- 'radia_filtered_chr1.vcf': <JSid>
              |- 'chr2'
              |   +- 'radia_filtered_chr2.vcf': <JSid>
             etc...

    This module corresponds to node 11 on the tree
    """
    job.fileStore.logToMaster('Running spawn_radia on %s' %
                              univ_options['patient'])
    rna_bam_key = 'rnaAligned.sortedByCoord.out.bam'  # to reduce next line size
    bams = {
        'tumor_rna': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
        'tumor_rnai': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam.bai'],
        'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']
    }
    # Get a list of chromosomes to process
    chromosomes = sample_chromosomes(job, radia_options['genome_fai'])
    perchrom_radia = defaultdict()
    for chrom in chromosomes:
        radia = job.addChildJobFn(
            run_radia_perchrom,
            bams,
            univ_options,
            radia_options,
            chrom,
            memory='6G',
            disk=PromisedRequirement(
                radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                normal_bam['normal_dna_fix_pg_sorted.bam'],
                rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
                radia_options['genome_fasta']))
        filter_radia = radia.addChildJobFn(
            run_filter_radia,
            bams,
            radia.rv(),
            univ_options,
            radia_options,
            chrom,
            memory='6G',
            disk=PromisedRequirement(
                radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                normal_bam['normal_dna_fix_pg_sorted.bam'],
                rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
                radia_options['genome_fasta']))
        perchrom_radia[chrom] = filter_radia.rv()
    return perchrom_radia
Esempio n. 12
0
def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_options, split=True):
    """
    This module will spawn a somaticsniper job for each chromosome on the DNA bams.

    ARGUMENTS
    1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai
         tumor_bam
              |- 'tumor_fix_pg_sorted.bam': <JSid>
              +- 'tumor_fix_pg_sorted.bam.bai': <JSid>
    2. normal_bam: Dict of input normal WGS/WSQ bam + bai
         normal_bam
              |- 'normal_fix_pg_sorted.bam': <JSid>
              +- 'normal_fix_pg_sorted.bam.bai': <JSid>
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. somaticsniper_options: Dict of parameters specific to somaticsniper
         somaticsniper_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              |- 'dbsnp_idx': <JSid for dnsnp vcf index file>
              |- 'cosmic_vcf': <JSid for cosmic vcf file>
              |- 'cosmic_idx': <JSid for cosmic vcf index file>
              |- 'genome_fasta': <JSid for genome fasta file>
              +- 'genome_dict': <JSid for genome fasta dict file>
              +- 'genome_fai': <JSid for genome fasta index file>

    RETURN VALUES
    1. perchrom_somaticsniper: Dict of results of somaticsniper per chromosome
         perchrom_somaticsniper
              |- 'chr1'
              |   +- <JSid for somaticsniper_chr1.vcf>
              |- 'chr2'
              |   +- <JSid for somaticsniper_chr2.vcf>
             etc...

    This module corresponds to node 11 on the tree
    """
    # Get a list of chromosomes to handle
    chromosomes = sample_chromosomes(job, somaticsniper_options['genome_fai'])
    perchrom_somaticsniper = defaultdict()
    snipe = job.wrapJobFn(run_somaticsniper_full, tumor_bam, normal_bam, univ_options,
                          somaticsniper_options,
                          disk=PromisedRequirement(sniper_disk,
                                                   tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                   normal_bam['normal_dna_fix_pg_sorted.bam'],
                                                   somaticsniper_options['genome_fasta']),
                          memory='6G')
    pileup = job.wrapJobFn(run_pileup, tumor_bam, univ_options, somaticsniper_options,
                           disk=PromisedRequirement(pileup_disk,
                                                    tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                    somaticsniper_options['genome_fasta']),
                           memory='6G')
    filtersnipes = job.wrapJobFn(filter_somaticsniper, tumor_bam, snipe.rv(), pileup.rv(),
                                 univ_options, somaticsniper_options,
                                 disk=PromisedRequirement(sniper_filter_disk,
                                                          tumor_bam['tumor_dna_fix_pg_sorted.bam'],
                                                          somaticsniper_options['genome_fasta']),
                                 memory='6G')

    job.addChild(snipe)
    job.addChild(pileup)
    snipe.addChild(filtersnipes)
    pileup.addChild(filtersnipes)
    if split:
        unmerge_snipes = job.wrapJobFn(unmerge, filtersnipes.rv(), 'somaticsniper',
                                       somaticsniper_options, univ_options)
        filtersnipes.addChild(unmerge_snipes)
        return unmerge_snipes.rv()
    else:
        return filtersnipes.rv()