Beispiel #1
0
def GATKpreprocessing(infile, outfile):
    '''Reorders BAM according to reference fasta and add read groups using
       SAMtools, realigns around indels and recalibrates base quality scores
       using GATK'''

    to_cluster = USECLUSTER
    track = P.snip(os.path.basename(infile), ".bam")
    tmpdir_gatk = P.getTempDir()
    job_memory = PARAMS["gatk_memory"]

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    outfile1 = outfile.replace(".bqsr", ".readgroups.bqsr")
    outfile2 = outfile.replace(".bqsr", ".realign.bqsr")

    PipelineExome.GATKReadGroups(infile, outfile1, genome,
                                 PARAMS["readgroup_library"],
                                 PARAMS["readgroup_platform"],
                                 PARAMS["readgroup_platform_unit"])

    PipelineExome.GATKIndelRealign(outfile1, outfile2, genome,
                                   PARAMS["gatk_threads"])

    IOTools.zapFile(outfile1)

    PipelineExome.GATKBaseRecal(outfile2, outfile, genome,
                                PARAMS["gatk_dbsnp"],
                                PARAMS["gatk_solid_options"])
    IOTools.zapFile(outfile2)
def runMutectReverse(infiles, outfile):
    '''Use control as tumor and vis versa to estimate false positive rate'''
    infile, normal_panel = infiles
    infile_tumour = infile.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])

    basename = P.snip(outfile, "_normal_mutect.vcf")
    call_stats_out = basename + "_call_stats.out"
    mutect_log = basename + ".log"

    basename = P.snip(outfile, ".mutect.reverse.snp.vcf")
    call_stats_out = basename + "_call_stats.reverse.out"
    coverage_wig_out = basename + "_coverage.reverse.wig"
    mutect_log = basename + ".reverse.log"

    (cosmic, dbsnp, quality, max_alt_qual, max_alt,
     max_fraction, tumor_LOD) = (
         PARAMS["mutect_cosmic"], PARAMS["gatk_dbsnp"],
         PARAMS["mutect_quality"], PARAMS["mutect_max_alt_qual"],
         PARAMS["mutect_max_alt"], PARAMS["mutect_max_fraction"],
         PARAMS["mutect_LOD"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineExome.mutectSNPCaller(infile, outfile, mutect_log, genome,
                                  cosmic, dbsnp, call_stats_out,
                                  PARAMS['mutect_memory'],
                                  PARAMS['mutect_threads'],
                                  quality, max_alt_qual,
                                  max_alt, max_fraction, tumor_LOD,
                                  normal_panel, infile_tumour)
Beispiel #3
0
def defineEBioStudies(outfile):
    ''' For the cancer types specified in pipeline.ini, identify the
    relevent studies in eBio '''

    cancer_types = PARAMS["annotation_ebio_cancer_types"]

    PipelineExome.defineEBioStudies(cancer_types, outfile, submit=False)
def runMutectOnDownsampled(infiles, outfile):
    '''call somatic SNPs using MuTect on downsampled bams'''
    infile, normal_panel = infiles
    infile_tumour = infile.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])
    basename = P.snip(outfile, "_normal_mutect.vcf")

    call_stats_out = basename + "_call_stats.out"
    mutect_log = basename + ".log"

    (cosmic, dbsnp, quality, max_alt_qual, max_alt,
     max_fraction, tumor_LOD) = (
         PARAMS["mutect_cosmic"], PARAMS["gatk_dbsnp"],
         PARAMS["mutect_quality"], PARAMS["mutect_max_alt_qual"],
         PARAMS["mutect_max_alt"], PARAMS["mutect_max_fraction"],
         PARAMS["mutect_LOD"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineExome.mutectSNPCaller(infile_tumour, outfile, mutect_log, genome,
                                  cosmic, dbsnp, call_stats_out,
                                  PARAMS['mutect_memory'], PARAMS['mutect_threads'],
                                  quality, max_alt_qual,
                                  max_alt, max_fraction, tumor_LOD,
                                  normal_panel, infile)
def defineEBioStudies(outfile):
    ''' For the cancer types specified in pipeline.ini, identify the
    relevent studies in eBio '''

    cancer_types = PARAMS["annotation_ebio_cancer_types"]

    PipelineExome.defineEBioStudies(cancer_types, outfile, submit=False)
Beispiel #6
0
def runMutectReverse(infiles, outfile):
    '''Use control as tumor and vis versa to estimate false positive rate'''
    infile, normal_panel = infiles
    infile_tumour = infile.replace(PARAMS["sample_control"],
                                   PARAMS["sample_tumour"])

    basename = P.snip(outfile, "_normal_mutect.vcf")
    call_stats_out = basename + "_call_stats.out"
    mutect_log = basename + ".log"

    basename = P.snip(outfile, ".mutect.reverse.snp.vcf")
    call_stats_out = basename + "_call_stats.reverse.out"
    coverage_wig_out = basename + "_coverage.reverse.wig"
    mutect_log = basename + ".reverse.log"

    (cosmic, dbsnp, quality, max_alt_qual, max_alt, max_fraction,
     tumor_LOD) = (PARAMS["mutect_cosmic"], PARAMS["gatk_dbsnp"],
                   PARAMS["mutect_quality"], PARAMS["mutect_max_alt_qual"],
                   PARAMS["mutect_max_alt"], PARAMS["mutect_max_fraction"],
                   PARAMS["mutect_LOD"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineExome.mutectSNPCaller(infile, outfile, mutect_log, genome, cosmic,
                                  dbsnp, call_stats_out,
                                  PARAMS['mutect_memory'],
                                  PARAMS['mutect_threads'], quality,
                                  max_alt_qual, max_alt, max_fraction,
                                  tumor_LOD, normal_panel, infile_tumour)
Beispiel #7
0
def extractEBioinfo(infiles, outfile):
    '''find the number of mutations identitified in previous studies (ebio_ids)
    for the mutated genes in the annotated vcfs'''

    eBio_ids = infiles[0]
    vcfs = infiles[1:]

    PipelineExome.extractEBioinfo(eBio_ids, vcfs, outfile, submit=False)
def extractEBioinfo(infiles, outfile):
    '''find the number of mutations identitified in previous studies (ebio_ids)
    for the mutated genes in the annotated vcfs'''

    eBio_ids = infiles[0]
    vcfs = infiles[1:]

    PipelineExome.extractEBioinfo(eBio_ids, vcfs, outfile, submit=False)
def mutationalSignature(infiles, outfiles):

    min_t_alt = PARAMS["filter_minimum_tumor_allele"]
    min_t_alt_freq = PARAMS["filter_minimum_tumor_allele_frequency"]
    min_n_depth = PARAMS["filter_minimum_normal_depth"]
    max_n_alt_freq = PARAMS["filter_maximum_normal_allele_frequency"]
    tumour = PARAMS["mutect_tumour"]

    PipelineExome.compileMutationalSignature(
        infiles, outfiles, min_t_alt, min_n_depth, max_n_alt_freq,
        min_t_alt_freq, tumour, submit=True)
def indelCaller(infile, outfile):
    '''Call somatic indels using Strelka'''
    infile_tumour = infile.replace("Control", PARAMS["mutect_tumour"])
    outdir = "/".join(outfile.split("/")[0:2])
    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])
    config = "config.ini"

    PipelineExome.strelkaINDELCaller(infile, infile_tumour, outfile,
                                     genome, config, outdir, 
                                     PARAMS['strelka_memory'],
                                     PARAMS['strelka_threads'])
Beispiel #11
0
def indelCaller(infile, outfile):
    '''Call somatic indels using Strelka'''

    infile_tumour = infile.replace(PARAMS["sample_control"],
                                   PARAMS["sample_tumour"])
    outdir = "/".join(outfile.split("/")[0:2])
    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineExome.strelkaINDELCaller(infile, infile_tumour, outfile, genome,
                                     PARAMS['strelka_config'], outdir,
                                     PARAMS['strelka_memory'],
                                     PARAMS['strelka_threads'])
Beispiel #12
0
def filterMutect(infile, outfile):
    ''' filter mutect snps using allele frequencies '''

    logfile = outfile.replace(".vcf", ".log")

    min_t_alt = PARAMS["filter_minimum_tumor_allele"]
    min_t_alt_freq = PARAMS["filter_minimum_tumor_allele_frequency"]
    min_n_depth = PARAMS["filter_minimum_normal_depth"]
    max_n_alt_freq = PARAMS["filter_maximum_normal_allele_frequency"]
    min_ratio = PARAMS["filter_minimum_ratio"]

    PipelineExome.filterMutect(infile, outfile, logfile,
                               PARAMS["sample_control"],
                               PARAMS["sample_tumour"], min_t_alt, min_n_depth,
                               max_n_alt_freq, min_t_alt_freq, min_ratio)
def filterMutect(infile, outfile):
    ''' filter mutect snps using allele frequencies '''

    logfile = outfile.replace(".vcf", ".log")

    min_t_alt = PARAMS["filter_minimum_tumor_allele"]
    min_t_alt_freq = PARAMS["filter_minimum_tumor_allele_frequency"]
    min_n_depth = PARAMS["filter_minimum_normal_depth"]
    max_n_alt_freq = PARAMS["filter_maximum_normal_allele_frequency"]
    min_ratio = PARAMS["filter_minimum_ratio"]

    PipelineExome.filterMutect(
        infile, outfile, logfile,
        PARAMS["sample_control"], PARAMS["sample_tumour"],
        min_t_alt, min_n_depth, max_n_alt_freq,
        min_t_alt_freq, min_ratio)
def callControlVariants(infile, outfile):
    '''run mutect to call snps in control sample'''

    basename = P.snip(outfile, "_normal_mutect.vcf")
    call_stats_out = basename + "_call_stats.out"
    mutect_log = basename + ".log"

    cosmic, dbsnp, = (PARAMS["mutect_cosmic"],
                      PARAMS["gatk_dbsnp"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineExome.mutectSNPCaller(infile, outfile, mutect_log, genome, cosmic,
                                  dbsnp, call_stats_out, PARAMS['mutect_memory'],
                                  PARAMS['mutect_threads'], artifact=True)
Beispiel #15
0
def realignMatchedSample(infile, outfile):
    ''' repeat realignments with merged bam of control and tumor
        this should help avoid problems with sample-specific realignments'''

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineExome.GATKIndelRealign(infile, outfile, genome)

    IOTools.zapFile(infile)
Beispiel #16
0
def callControlVariants(infile, outfile):
    '''run mutect to call snps in control sample'''

    basename = P.snip(outfile, "_normal_mutect.vcf")
    call_stats_out = basename + "_call_stats.out"
    mutect_log = basename + ".log"

    cosmic, dbsnp, = (PARAMS["mutect_cosmic"], PARAMS["gatk_dbsnp"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineExome.mutectSNPCaller(infile,
                                  outfile,
                                  mutect_log,
                                  genome,
                                  cosmic,
                                  dbsnp,
                                  call_stats_out,
                                  PARAMS['mutect_memory'],
                                  PARAMS['mutect_threads'],
                                  artifact=True)
Beispiel #17
0
def runMutectOnDownsampled(infiles, outfile):
    '''call somatic SNPs using MuTect on downsampled bams'''
    infile, normal_panel = infiles
    infile_tumour = infile.replace(PARAMS["sample_control"],
                                   PARAMS["sample_tumour"])
    basename = P.snip(outfile, "_normal_mutect.vcf")

    call_stats_out = basename + "_call_stats.out"
    mutect_log = basename + ".log"

    (cosmic, dbsnp, quality, max_alt_qual, max_alt, max_fraction,
     tumor_LOD) = (PARAMS["mutect_cosmic"], PARAMS["gatk_dbsnp"],
                   PARAMS["mutect_quality"], PARAMS["mutect_max_alt_qual"],
                   PARAMS["mutect_max_alt"], PARAMS["mutect_max_fraction"],
                   PARAMS["mutect_LOD"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineExome.mutectSNPCaller(infile_tumour, outfile, mutect_log, genome,
                                  cosmic, dbsnp, call_stats_out,
                                  PARAMS['mutect_memory'],
                                  PARAMS['mutect_threads'], quality,
                                  max_alt_qual, max_alt, max_fraction,
                                  tumor_LOD, normal_panel, infile)
def intersectHeatmap(infiles, outfile):
    ''' intersect DE test_ids across the different quantifiers'''

    PipelineExome.intersectionHeatmap(infiles, outfile)
def mutationalSignature(infiles, outfiles):

    PipelineExome.compileMutationalSignature(
        infiles, outfiles)
def summariseFiltering(infile, outfile):
    infile = infile.replace(".mutect.snp.vcf", "_call_stats.out")

    PipelineExome.parseMutectCallStats(infile, outfile, submit=True)
Beispiel #21
0
def mutationalSignature(infiles, outfiles):

    PipelineExome.compileMutationalSignature(infiles, outfiles)
Beispiel #22
0
def intersectHeatmap(infiles, outfile):
    ''' intersect DE test_ids across the different quantifiers'''

    PipelineExome.intersectionHeatmap(infiles, outfile)
Beispiel #23
0
def summariseFiltering(infile, outfile):
    infile = infile.replace(".mutect.snp.vcf", "_call_stats.out")

    PipelineExome.parseMutectCallStats(infile, outfile, submit=True)