Ejemplo n.º 1
0
def buildCoverageStats(infile, outfile):
    '''Generate coverage statistics for regions of interest from a
       bed file using Picard'''

    # TS check whether this is always required or specific to current baits file

    # baits file requires modification to make picard accept it
    # this is performed before CalculateHsMetrics
    to_cluster = USECLUSTER
    baits = PARAMS["roi_baits"]
    modified_baits = infile + "_temp_baits_final.bed"
    regions = PARAMS["roi_regions"]
    statement = '''samtools view -H %(infile)s > %(infile)s_temp_header.txt;
                awk 'NR>2' %(baits)s |
                awk -F '\\t' 'BEGIN { OFS="\\t" } {print $1,$2,$3,"+",$4;}'
                > %(infile)s_temp_baits.bed;
                cat  %(infile)s_temp_header.txt %(infile)s_temp_baits.bed
                > %(modified_baits)s; checkpoint ;
                rm -rf %(infile)s_temp_baits.bed %(infile)s_temp_header.txt
                '''
    P.run()

    PipelineMappingQC.buildPicardCoverageStats(
        infile, outfile, modified_baits, modified_baits)

    IOTools.zapFile(modified_baits)
Ejemplo n.º 2
0
def buildCoverageStats(infile, outfile):
    '''Generate coverage statistics for regions of interest from a
       bed file using Picard'''

    # TS check whether this is always required or specific to current baits
    # file

    # baits file requires modification to make picard accept it
    # this is performed before CalculateHsMetrics
    to_cluster = USECLUSTER
    baits = PARAMS["roi_baits"]
    modified_baits = infile + "_temp_baits_final.bed"
    regions = PARAMS["roi_regions"]
    statement = '''samtools view -H %(infile)s > %(infile)s_temp_header.txt;
                awk 'NR>2' %(baits)s |
                awk -F '\\t' 'BEGIN { OFS="\\t" } {print $1,$2,$3,"+",$4;}'
                > %(infile)s_temp_baits.bed;
                cat  %(infile)s_temp_header.txt %(infile)s_temp_baits.bed
                > %(modified_baits)s; checkpoint ;
                rm -rf %(infile)s_temp_baits.bed %(infile)s_temp_header.txt
                '''
    P.run()

    PipelineMappingQC.buildPicardCoverageStats(infile, outfile, modified_baits,
                                               modified_baits)

    IOTools.zapFile(modified_baits)
Ejemplo n.º 3
0
def buildPicardStats(infile, outfile):
    '''build alignment stats using picard.
    Note that picards counts reads but they are in fact alignments.
    '''
    if PARAMS["pool_reads"]:
        reffile = os.path.join(
            os.path.dirname(infile), "agg-agg-agg.filtered.contigs.fa")
    else:
        reffile = P.snip(infile, ".bam") + ".fa"
    PipelineMappingQC.buildPicardAlignmentStats(infile,
                                                outfile,
                                                reffile)
Ejemplo n.º 4
0
def runPicardOnRealigned(infile, outfile):
    to_cluster = USECLUSTER
    job_options = getGATKOptions()
    # TS no multithreading so why 6 threads?
    # job_threads = 6
    tmpdir_gatk = P.getTempDir('/ifs/scratch')
    # threads = PARAMS["gatk_threads"]

    outfile_tumor = outfile.replace("Control", PARAMS["mutect_tumour"])
    infile_tumor = infile.replace("Control", PARAMS["mutect_tumour"])

    track = P.snip(os.path.basename(infile), ".bam")
    track_tumor = track.replace("Control", PARAMS["mutect_tumour"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome)
    PipelineMappingQC.buildPicardAlignmentStats(infile_tumor,
                                                outfile_tumor, genome)

    # check above functions then remove statement
    statement = '''
    cat %(infile)s
    | python %%(scriptsdir)s/bam2bam.py -v 0 --method=set-sequence
    | CollectMultipleMetrics
    INPUT=/dev/stdin
    REFERENCE_SEQUENCE=%%(bwa_index_dir)s/%%(genome)s.fa
    ASSUME_SORTED=true
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile)s;
    cat %(infile_tumor)s
    | python %%(scriptsdir)s/bam2bam.py -v 0
    --method=set-sequence --output-sam
    | CollectMultipleMetrics
    INPUT=/dev/stdin
    REFERENCE_SEQUENCE=%%(bwa_index_dir)s/%%(genome)s.fa
    ASSUME_SORTED=true
    OUTPUT=%(outfile_tumor)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile_tumor)s;''' % locals()
Ejemplo n.º 5
0
def runPicardOnRealigned(infile, outfile):
    to_cluster = USECLUSTER
    job_memory = PARAMS["gatk_memory"]

    tmpdir_gatk = P.getTempDir()

    outfile_tumor = outfile.replace(PARAMS["sample_control"],
                                    PARAMS["sample_tumour"])
    infile_tumor = infile.replace(PARAMS["sample_control"],
                                  PARAMS["sample_tumour"])

    track = P.snip(os.path.basename(infile), ".bam")
    track_tumor = track.replace(PARAMS["sample_control"],
                                PARAMS["sample_tumour"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome)
    PipelineMappingQC.buildPicardAlignmentStats(infile_tumor, outfile_tumor,
                                                genome)
Ejemplo n.º 6
0
def runPicardOnRealigned(infile, outfile):
    to_cluster = USECLUSTER
    job_memory = PARAMS["gatk_memory"]

    tmpdir_gatk = P.getTempDir()

    outfile_tumor = outfile.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])
    infile_tumor = infile.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])

    track = P.snip(os.path.basename(infile), ".bam")
    track_tumor = track.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome)
    PipelineMappingQC.buildPicardAlignmentStats(infile_tumor,
                                                outfile_tumor, genome)
Ejemplo n.º 7
0
def buildBAMStats(infile, outfile):
    '''Count number of reads mapped, duplicates, etc. '''
    PipelineMappingQC.buildBAMStats(infile, outfile)
Ejemplo n.º 8
0
def buildPicardGCStats(infile, outfile):
    '''Gather BAM file GC bias stats using Picard '''
    PipelineMappingQC.buildPicardGCStats(
        infile, outfile,
        os.path.join(PARAMS["bwa_index_dir"], PARAMS["genome"] + ".fa"))
Ejemplo n.º 9
0
def loadPicardAlignmentStats(infiles, outfile):
    '''Merge Picard alignment stats into single table and load into SQLite.'''

    PipelineMappingQC.loadPicardAlignmentStats(infiles, outfile)
Ejemplo n.º 10
0
def buildPicardAlignmentStats(infile, outfile):
    '''Gather BAM file alignment statistics using Picard '''

    PipelineMappingQC.buildPicardAlignmentStats(
        infile, outfile,
        os.path.join(PARAMS["bwa_index_dir"], PARAMS["genome"] + ".fa"))
Ejemplo n.º 11
0
def loadPicardDuplicateStats(infiles, outfile):
    '''Merge Picard duplicate stats into single table and load into SQLite.
    '''
    PipelineMappingQC.loadPicardDuplicateStats(infiles, outfile)
Ejemplo n.º 12
0
def loadCoverageStats(infiles, outfile):
    PipelineMappingQC.loadPicardCoverageStats(infiles, outfile)
Ejemplo n.º 13
0
def buildPicardAlignmentStats(infile, outfile):
    '''Gather BAM file alignment statistics using Picard '''

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile,
                                                os.path.join(PARAMS["bwa_index_dir"],
                                                             PARAMS["genome"] + ".fa"))
Ejemplo n.º 14
0
def loadBAMStats(infiles, outfile):
    ''' load bam statistics into bam_stats table '''
    PipelineMappingQC.loadBAMStats(infiles, outfile)
Ejemplo n.º 15
0
def loadBAMStats(infiles, outfile):
    '''Import bam statistics into SQLite'''
    PipelineMappingQC.loadBAMStats(infiles, outfile)
Ejemplo n.º 16
0
def loadPicardAlignStats(infiles, outfile):
    '''Merge Picard alignment stats into single table and load into SQLite.'''
    PipelineMappingQC.loadPicardAlignmentStats(infiles, outfile)
Ejemplo n.º 17
0
def loadCoverageStats(infiles, outfile):
    PipelineMappingQC.loadPicardCoverageStats(infiles, outfile)
Ejemplo n.º 18
0
def buildPicardGCStats(infile, outfile):
    '''Gather BAM file GC bias stats using Picard '''
    PipelineMappingQC.buildPicardGCStats(infile, outfile,
                                         os.path.join(PARAMS["bwa_index_dir"],
                                                      PARAMS["genome"] + ".fa"))
Ejemplo n.º 19
0
def loadBAMStats(infiles, outfile):
    '''Import bam statistics into SQLite'''
    PipelineMappingQC.loadBAMStats(infiles, outfile)
Ejemplo n.º 20
0
def buildBAMStats(infile, outfile):
    '''Count number of reads mapped, duplicates, etc. '''
    PipelineMappingQC.buildBAMStats(infile, outfile)
Ejemplo n.º 21
0
def loadPicardDuplicateStats( infiles, outfile ):
    '''Merge Picard duplicate stats into single table and load into SQLite.
    '''
    PipelineMappingQC.loadPicardDuplicateStats( infiles, outfile, pipeline_suffix = ".bed.gz" )
Ejemplo n.º 22
0
def loadPicardStats(infiles, outfile):
    '''merge alignment stats into single tables.'''

    PipelineMappingQC.loadPicardAlignmentStats(infiles, outfile)