def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options): """ Spawn a MuTect job for each chromosome on the DNA bams. :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq :param dict normal_bam: Dict of bam and bai for normal DNA-Seq :param dict univ_options: Dict of universal options used by almost all tools :param dict mutect_options: Options specific to MuTect :return: Dict of results from running MuTect on every chromosome perchrom_mutect: |- 'chr1': fsID |- 'chr2' fsID | |-... | +- 'chrM': fsID :rtype: dict """ # Get a list of chromosomes to handle if mutect_options['chromosomes']: chromosomes = mutect_options['chromosomes'] else: chromosomes = sample_chromosomes(job, mutect_options['genome_fai']) perchrom_mutect = defaultdict() for chrom in chromosomes: perchrom_mutect[chrom] = job.addChildJobFn( run_mutect_perchrom, tumor_bam, normal_bam, univ_options, mutect_options, chrom, memory='6G', disk=PromisedRequirement(mutect_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], mutect_options['genome_fasta'], mutect_options['dbsnp_vcf'], mutect_options['cosmic_vcf'])).rv() return perchrom_mutect
def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split=True): """ Run the strelka subgraph on the DNA bams. Optionally split the results into per-chromosome vcfs. :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq :param dict normal_bam: Dict of bam and bai for normal DNA-Seq :param dict univ_options: Dict of universal options used by almost all tools :param dict strelka_options: Options specific to strelka :param bool split: Should the results be split into perchrom vcfs? :return: Either the fsID to the genome-level vcf or a dict of results from running strelka on every chromosome perchrom_strelka: |- 'chr1': | |-'snvs': fsID | +-'indels': fsID |- 'chr2': | |-'snvs': fsID | +-'indels': fsID |-... | +- 'chrM': |-'snvs': fsID +-'indels': fsID :rtype: toil.fileStore.FileID|dict """ if strelka_options['chromosomes']: chromosomes = strelka_options['chromosomes'] else: chromosomes = sample_chromosomes(job, strelka_options['genome_fai']) num_cores = min(len(chromosomes), univ_options['max_cores']) strelka = job.wrapJobFn(run_strelka_full, tumor_bam, normal_bam, univ_options, strelka_options, disk=PromisedRequirement( strelka_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], strelka_options['genome_fasta']), memory='6G', cores=num_cores) job.addChild(strelka) if split: unmerge_strelka = job.wrapJobFn(wrap_unmerge, strelka.rv(), chromosomes, strelka_options, univ_options).encapsulate() strelka.addChild(unmerge_strelka) return unmerge_strelka.rv() else: return strelka.rv()
def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split=True): """ This module will spawn a strelka job for each chromosome on the DNA bams. ARGUMENTS 1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 2. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. strelka_options: Dict of parameters specific to strelka strelka_options |- 'dbsnp_vcf': <JSid for dnsnp vcf file> |- 'dbsnp_idx': <JSid for dnsnp vcf index file> |- 'cosmic_vcf': <JSid for cosmic vcf file> |- 'cosmic_idx': <JSid for cosmic vcf index file> |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_dict': <JSid for genome fasta dict file> +- 'genome_fai': <JSid for genome fasta index file> RETURN VALUES 1. perchrom_strelka: Dict of results of strelka per chromosome perchrom_strelka |- 'chr1' | +- 'strelka_chr1.vcf': <JSid> | +- 'strelka_chr1.out': <JSid> |- 'chr2' | |- 'strelka_chr2.vcf': <JSid> | +- 'strelka_chr2.out': <JSid> etc... This module corresponds to node 11 on the tree """ chromosomes = sample_chromosomes(job, strelka_options['genome_fai']) num_cores = min(len(chromosomes), univ_options['max_cores']) strelka = job.wrapJobFn(run_strelka_full, tumor_bam, normal_bam, univ_options, strelka_options, disk=PromisedRequirement(strelka_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], strelka_options['genome_fasta']), memory='6G', cores=num_cores) job.addChild(strelka) if split: unmerge_strelka = job.wrapJobFn(wrap_unmerge, strelka.rv(), strelka_options, univ_options ).encapsulate() strelka.addChild(unmerge_strelka) return unmerge_strelka.rv() else: return strelka.rv()
def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_options, split=True): """ Run the SomaticSniper subgraph on the DNA bams. Optionally split the results into per-chromosome vcfs. :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq :param dict normal_bam: Dict of bam and bai for normal DNA-Seq :param dict univ_options: Dict of universal options used by almost all tools :param dict somaticsniper_options: Options specific to SomaticSniper :param bool split: Should the results be split into perchrom vcfs? :return: Either the fsID to the genome-level vcf or a dict of results from running SomaticSniper on every chromosome perchrom_somaticsniper: |- 'chr1': fsID |- 'chr2' fsID | |-... | +- 'chrM': fsID :rtype: toil.fileStore.FileID|dict """ # Get a list of chromosomes to handle if somaticsniper_options['chromosomes']: chromosomes = somaticsniper_options['chromosomes'] else: chromosomes = sample_chromosomes(job, somaticsniper_options['genome_fai']) perchrom_somaticsniper = defaultdict() snipe = job.wrapJobFn(run_somaticsniper_full, tumor_bam, normal_bam, univ_options, somaticsniper_options, disk=PromisedRequirement(sniper_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], somaticsniper_options['genome_fasta']), memory='6G') pileup = job.wrapJobFn(run_pileup, tumor_bam, univ_options, somaticsniper_options, disk=PromisedRequirement(pileup_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], somaticsniper_options['genome_fasta']), memory='6G') filtersnipes = job.wrapJobFn(filter_somaticsniper, tumor_bam, snipe.rv(), pileup.rv(), univ_options, somaticsniper_options, disk=PromisedRequirement(sniper_filter_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], somaticsniper_options['genome_fasta']), memory='6G') job.addChild(snipe) job.addChild(pileup) snipe.addChild(filtersnipes) pileup.addChild(filtersnipes) if split: unmerge_snipes = job.wrapJobFn(unmerge, filtersnipes.rv(), 'somaticsniper', chromosomes, somaticsniper_options, univ_options) filtersnipes.addChild(unmerge_snipes) return unmerge_snipes.rv() else: return filtersnipes.rv()
def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options): """ This module will spawn a mutect job for each chromosome on the DNA bams. ARGUMENTS 1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 2. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. mutect_options: Dict of parameters specific to mutect mutect_options |- 'dbsnp_vcf': <JSid for dnsnp vcf file> |- 'dbsnp_idx': <JSid for dnsnp vcf index file> |- 'cosmic_vcf': <JSid for cosmic vcf file> |- 'cosmic_idx': <JSid for cosmic vcf index file> |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_dict': <JSid for genome fasta dict file> +- 'genome_fai': <JSid for genome fasta index file> RETURN VALUES 1. perchrom_mutect: Dict of results of mutect per chromosome perchrom_mutect |- 'chr1' | +- 'mutect_chr1.vcf': <JSid> | +- 'mutect_chr1.out': <JSid> |- 'chr2' | |- 'mutect_chr2.vcf': <JSid> | +- 'mutect_chr2.out': <JSid> etc... This module corresponds to node 11 on the tree """ # Get a list of chromosomes to handle chromosomes = sample_chromosomes(job, mutect_options['genome_fai']) perchrom_mutect = defaultdict() for chrom in chromosomes: perchrom_mutect[chrom] = job.addChildJobFn( run_mutect_perchrom, tumor_bam, normal_bam, univ_options, mutect_options, chrom, memory='6G', disk=PromisedRequirement( mutect_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], mutect_options['genome_fasta'], mutect_options['dbsnp_vcf'], mutect_options['cosmic_vcf'])).rv() return perchrom_mutect
def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options): """ This module will spawn a radia job for each chromosome, on the RNA and DNA. ARGUMENTS 1. rna_bam: Dict of input STAR bams rna_bam |- 'rnaAligned.sortedByCoord.out.bam': REFER run_star() |- 'rna_fix_pg_sorted.bam': <JSid> +- 'rna_fix_pg_sorted.bam.bai': <JSid> 2. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 3. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 4. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 5. radia_options: Dict of parameters specific to radia radia_options |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_fai': <JSid for genome fai file> RETURN VALUES 1. perchrom_radia: Dict of results of radia per chromosome perchrom_radia |- 'chr1' | +- 'radia_filtered_chr1.vcf': <JSid> |- 'chr2' | +- 'radia_filtered_chr2.vcf': <JSid> etc... This module corresponds to node 11 on the tree """ job.fileStore.logToMaster('Running spawn_radia on %s' % univ_options['patient']) rna_bam_key = 'rnaAligned.sortedByCoord.out.bam' # to reduce next line size bams = {'tumor_rna': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'], 'tumor_rnai': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam.bai'], 'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'], 'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'], 'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'], 'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']} # Get a list of chromosomes to process chromosomes = sample_chromosomes(job, radia_options['genome_fai']) perchrom_radia = defaultdict() for chrom in chromosomes: radia = job.addChildJobFn(run_radia_perchrom, bams, univ_options, radia_options, chrom, disk='60G', memory='6G') filter_radia = radia.addChildJobFn(run_filter_radia, bams, radia.rv(), univ_options, radia_options, chrom, disk='60G', memory='6G') perchrom_radia[chrom] = filter_radia.rv() return perchrom_radia
def run_muse(job, tumor_bam, normal_bam, univ_options, muse_options): """ This module will spawn a muse job for each chromosome on the DNA bams. ARGUMENTS 1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 2. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. muse_options: Dict of parameters specific to muse muse_options |- 'dbsnp_vcf': <JSid for dnsnp vcf file> |- 'dbsnp_idx': <JSid for dnsnp vcf index file> |- 'cosmic_vcf': <JSid for cosmic vcf file> |- 'cosmic_idx': <JSid for cosmic vcf index file> |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_dict': <JSid for genome fasta dict file> +- 'genome_fai': <JSid for genome fasta index file> RETURN VALUES 1. perchrom_muse: Dict of results of muse per chromosome perchrom_muse |- 'chr1' | +- <JSid for muse_chr1.vcf> |- 'chr2' | +- <JSid for muse_chr2.vcf> etc... This module corresponds to node 11 on the tree """ # Get a list of chromosomes to handle chromosomes = sample_chromosomes(job, muse_options['genome_fai']) perchrom_muse = defaultdict() for chrom in chromosomes: call = job.addChildJobFn(run_muse_perchrom, tumor_bam, normal_bam, univ_options, muse_options, chrom, disk=PromisedRequirement(muse_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], muse_options['genome_fasta']), memory='6G') sump = call.addChildJobFn(run_muse_sump_perchrom, call.rv(), univ_options, muse_options, chrom, disk=PromisedRequirement(muse_sump_disk, muse_options['dbsnp_vcf']), memory='6G') perchrom_muse[chrom] = sump.rv() return perchrom_muse
def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options): """ This module will spawn a mutect job for each chromosome on the DNA bams. ARGUMENTS 1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 2. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. mutect_options: Dict of parameters specific to mutect mutect_options |- 'dbsnp_vcf': <JSid for dnsnp vcf file> |- 'dbsnp_idx': <JSid for dnsnp vcf index file> |- 'cosmic_vcf': <JSid for cosmic vcf file> |- 'cosmic_idx': <JSid for cosmic vcf index file> |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_dict': <JSid for genome fasta dict file> +- 'genome_fai': <JSid for genome fasta index file> RETURN VALUES 1. perchrom_mutect: Dict of results of mutect per chromosome perchrom_mutect |- 'chr1' | +- 'mutect_chr1.vcf': <JSid> | +- 'mutect_chr1.out': <JSid> |- 'chr2' | |- 'mutect_chr2.vcf': <JSid> | +- 'mutect_chr2.out': <JSid> etc... This module corresponds to node 11 on the tree """ # Get a list of chromosomes to handle chromosomes = sample_chromosomes(job, mutect_options['genome_fai']) perchrom_mutect = defaultdict() for chrom in chromosomes: perchrom_mutect[chrom] = job.addChildJobFn(run_mutect_perchrom, tumor_bam, normal_bam, univ_options, mutect_options, chrom, disk='60G', memory='6G').rv() return perchrom_mutect
def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options): """ Spawn a RADIA job for each chromosome on the input bam trios. :param dict rna_bam: Dict of bam and bai for tumor DNA-Seq. It can be one of two formats rna_bam: # Just the genomic bam and bai |- 'rna_genome_sorted.bam': fsID +- 'rna_genome_sorted.bam.bai': fsID OR rna_bam: # The output from run_star |- 'rna_transcriptome.bam': fsID |- 'rna_genome': # Only this part will be used |- 'rna_genome_sorted.bam': fsID +- 'rna_genome_sorted.bam.bai': fsID :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq :param dict normal_bam: Dict of bam and bai for normal DNA-Seq :param dict univ_options: Dict of universal options used by almost all tools :param dict radia_options: Options specific to RADIA :return: Dict of results from running RADIA on every chromosome perchrom_radia: |- 'chr1': fsID |- 'chr2' fsID | |-... | +- 'chrM': fsID :rtype: dict """ if 'rna_genome' in rna_bam.keys(): rna_bam = rna_bam['rna_genome'] elif set(rna_bam.keys()) == { 'rna_genome_sorted.bam', 'rna_genome_sorted.bam.bai' }: pass else: raise RuntimeError( 'An improperly formatted dict was passed to rna_bam.') bams = { 'tumor_rna': rna_bam['rna_genome_sorted.bam'], 'tumor_rnai': rna_bam['rna_genome_sorted.bam.bai'], 'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'], 'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'], 'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'], 'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'] } # Get a list of chromosomes to process if radia_options['chromosomes']: chromosomes = radia_options['chromosomes'] else: chromosomes = sample_chromosomes(job, radia_options['genome_fai']) perchrom_radia = defaultdict() for chrom in chromosomes: radia = job.addChildJobFn( run_radia_perchrom, bams, univ_options, radia_options, chrom, memory='6G', disk=PromisedRequirement( radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], rna_bam['rna_genome_sorted.bam'], radia_options['genome_fasta'])) filter_radia = radia.addChildJobFn( run_filter_radia, bams, radia.rv(), univ_options, radia_options, chrom, memory='6G', disk=PromisedRequirement( radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], rna_bam['rna_genome_sorted.bam'], radia_options['genome_fasta'])) perchrom_radia[chrom] = filter_radia.rv() job.fileStore.logToMaster('Ran spawn_radia on %s successfully' % univ_options['patient']) return perchrom_radia
def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split=True): """ This module will spawn a strelka job for each chromosome on the DNA bams. ARGUMENTS 1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 2. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. strelka_options: Dict of parameters specific to strelka strelka_options |- 'dbsnp_vcf': <JSid for dnsnp vcf file> |- 'dbsnp_idx': <JSid for dnsnp vcf index file> |- 'cosmic_vcf': <JSid for cosmic vcf file> |- 'cosmic_idx': <JSid for cosmic vcf index file> |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_dict': <JSid for genome fasta dict file> +- 'genome_fai': <JSid for genome fasta index file> RETURN VALUES 1. perchrom_strelka: Dict of results of strelka per chromosome perchrom_strelka |- 'chr1' | +- 'strelka_chr1.vcf': <JSid> | +- 'strelka_chr1.out': <JSid> |- 'chr2' | |- 'strelka_chr2.vcf': <JSid> | +- 'strelka_chr2.out': <JSid> etc... This module corresponds to node 11 on the tree """ chromosomes = sample_chromosomes(job, strelka_options['genome_fai']) num_cores = min(len(chromosomes), univ_options['max_cores']) strelka = job.wrapJobFn(run_strelka_full, tumor_bam, normal_bam, univ_options, strelka_options, disk=PromisedRequirement( strelka_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], strelka_options['genome_fasta']), memory='6G', cores=num_cores) job.addChild(strelka) if split: unmerge_strelka = job.wrapJobFn(wrap_unmerge, strelka.rv(), strelka_options, univ_options).encapsulate() strelka.addChild(unmerge_strelka) return unmerge_strelka.rv() else: return strelka.rv()
def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options): """ This module will spawn a radia job for each chromosome, on the RNA and DNA. ARGUMENTS 1. rna_bam: Dict of input STAR bams rna_bam |- 'rnaAligned.sortedByCoord.out.bam': REFER run_star() |- 'rna_fix_pg_sorted.bam': <JSid> +- 'rna_fix_pg_sorted.bam.bai': <JSid> 2. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 3. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 4. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 5. radia_options: Dict of parameters specific to radia radia_options |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_fai': <JSid for genome fai file> RETURN VALUES 1. perchrom_radia: Dict of results of radia per chromosome perchrom_radia |- 'chr1' | +- 'radia_filtered_chr1.vcf': <JSid> |- 'chr2' | +- 'radia_filtered_chr2.vcf': <JSid> etc... This module corresponds to node 11 on the tree """ job.fileStore.logToMaster('Running spawn_radia on %s' % univ_options['patient']) rna_bam_key = 'rnaAligned.sortedByCoord.out.bam' # to reduce next line size bams = { 'tumor_rna': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'], 'tumor_rnai': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam.bai'], 'tumor_dna': tumor_bam['tumor_dna_fix_pg_sorted.bam'], 'tumor_dnai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'], 'normal_dna': normal_bam['normal_dna_fix_pg_sorted.bam'], 'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'] } # Get a list of chromosomes to process chromosomes = sample_chromosomes(job, radia_options['genome_fai']) perchrom_radia = defaultdict() for chrom in chromosomes: radia = job.addChildJobFn( run_radia_perchrom, bams, univ_options, radia_options, chrom, memory='6G', disk=PromisedRequirement( radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'], radia_options['genome_fasta'])) filter_radia = radia.addChildJobFn( run_filter_radia, bams, radia.rv(), univ_options, radia_options, chrom, memory='6G', disk=PromisedRequirement( radia_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'], radia_options['genome_fasta'])) perchrom_radia[chrom] = filter_radia.rv() return perchrom_radia
def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_options, split=True): """ This module will spawn a somaticsniper job for each chromosome on the DNA bams. ARGUMENTS 1. tumor_bam: Dict of input tumor WGS/WSQ bam + bai tumor_bam |- 'tumor_fix_pg_sorted.bam': <JSid> +- 'tumor_fix_pg_sorted.bam.bai': <JSid> 2. normal_bam: Dict of input normal WGS/WSQ bam + bai normal_bam |- 'normal_fix_pg_sorted.bam': <JSid> +- 'normal_fix_pg_sorted.bam.bai': <JSid> 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. somaticsniper_options: Dict of parameters specific to somaticsniper somaticsniper_options |- 'dbsnp_vcf': <JSid for dnsnp vcf file> |- 'dbsnp_idx': <JSid for dnsnp vcf index file> |- 'cosmic_vcf': <JSid for cosmic vcf file> |- 'cosmic_idx': <JSid for cosmic vcf index file> |- 'genome_fasta': <JSid for genome fasta file> +- 'genome_dict': <JSid for genome fasta dict file> +- 'genome_fai': <JSid for genome fasta index file> RETURN VALUES 1. perchrom_somaticsniper: Dict of results of somaticsniper per chromosome perchrom_somaticsniper |- 'chr1' | +- <JSid for somaticsniper_chr1.vcf> |- 'chr2' | +- <JSid for somaticsniper_chr2.vcf> etc... This module corresponds to node 11 on the tree """ # Get a list of chromosomes to handle chromosomes = sample_chromosomes(job, somaticsniper_options['genome_fai']) perchrom_somaticsniper = defaultdict() snipe = job.wrapJobFn(run_somaticsniper_full, tumor_bam, normal_bam, univ_options, somaticsniper_options, disk=PromisedRequirement(sniper_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], normal_bam['normal_dna_fix_pg_sorted.bam'], somaticsniper_options['genome_fasta']), memory='6G') pileup = job.wrapJobFn(run_pileup, tumor_bam, univ_options, somaticsniper_options, disk=PromisedRequirement(pileup_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], somaticsniper_options['genome_fasta']), memory='6G') filtersnipes = job.wrapJobFn(filter_somaticsniper, tumor_bam, snipe.rv(), pileup.rv(), univ_options, somaticsniper_options, disk=PromisedRequirement(sniper_filter_disk, tumor_bam['tumor_dna_fix_pg_sorted.bam'], somaticsniper_options['genome_fasta']), memory='6G') job.addChild(snipe) job.addChild(pileup) snipe.addChild(filtersnipes) pileup.addChild(filtersnipes) if split: unmerge_snipes = job.wrapJobFn(unmerge, filtersnipes.rv(), 'somaticsniper', somaticsniper_options, univ_options) filtersnipes.addChild(unmerge_snipes) return unmerge_snipes.rv() else: return filtersnipes.rv()