コード例 #1
0
def strandSpecificity(infile, outfile):
    '''This function will determine the strand specificity of your library
    from the bam file'''

    iterations = "1000000"

    PipelineBamStats.getStrandSpecificity(infile, outfile, iterations)
コード例 #2
0
def strandSpecificity(infile, outfile):
    '''This function will determine the strand specificity of your library
    from the bam file'''

    iterations = "1000000"

    PipelineBamStats.getStrandSpecificity(infile,
                                          outfile,
                                          iterations)
コード例 #3
0
def processGenomicContext(infile, outfile):
    '''
    This module process genomic context file.
    It assigns each and every features of context
    file to a specific catagory. It helps us to
    understand heiarchical classification
    of features.
    '''
    PipelineBamStats.defineBedFeatures(infile, outfile)
コード例 #4
0
def processGenomicContext(infile, outfile):
    '''
    This module process genomic context file.
    It assigns each and every features of context
    file to a specific catagory. It helps us to
    understand heiarchical classification
    of features.
    '''
    PipelineBamStats.defineBedFeatures(infile, outfile)
コード例 #5
0
def intBam(infile, outfile):
    '''make an intermediate bam file if there is no sequence infomation.
    If there is no sequence quality then make a softlink. Picard tools
    has an issue when quality score infomation is missing'''

    if PARAMS["bam_sequence_stripped"] is True:
        PipelineBamStats.addPseudoSequenceQuality(infile, outfile)
    else:
        PipelineBamStats.copyBamFile(infile, outfile)
コード例 #6
0
def buildPicardStats(infiles, outfile):
    ''' build Picard alignment stats '''
    infile, reffile = infiles

    # patch for mapping against transcriptome - switch genomic reference
    # to transcriptomic sequences
    if "transcriptome.dir" in infile:
        reffile = "refcoding.fa"

    PipelineBamStats.buildPicardAlignmentStats(infile, outfile, reffile)
コード例 #7
0
def buildPicardRnaSeqMetrics(infiles, outfile):
    '''Get duplicate stats from picard RNASeqMetrics '''
    # convert strandness to tophat-style library type
    if PARAMS["strandness"] == ("RF" or "R"):
        strand = "SECOND_READ_TRANSCRIPTION_STRAND"
    elif PARAMS["strandness"] == ("FR" or "F"):
        strand = "FIRST_READ_TRANSCRIPTION_STRAND"
    else:
        strand = "NONE"
    PipelineBamStats.buildPicardRnaSeqMetrics(infiles, strand, outfile)
コード例 #8
0
def buildPicardRnaSeqMetrics(infiles, outfile):
    '''Get duplicate stats from picard RNASeqMetrics '''
    # convert strandness to tophat-style library type
    if PARAMS["strandness"] == ("RF" or "R"):
        strand = "SECOND_READ_TRANSCRIPTION_STRAND"
    elif PARAMS["strandness"] == ("FR" or "F"):
        strand = "FIRST_READ_TRANSCRIPTION_STRAND"
    else:
        strand = "NONE"
    PipelineBamStats.buildPicardRnaSeqMetrics(infiles, strand, outfile)
コード例 #9
0
def intBam(infile, outfile):
    '''make an intermediate bam file if there is no sequence infomation.
    If there is no sequence quality then make a softlink. Picard tools
    has an issue when quality score infomation is missing'''

    if PARAMS["bam_sequence_stripped"] is True:
        PipelineBamStats.addPseudoSequenceQuality(infile,
                                                  outfile)
    else:
        PipelineBamStats.copyBamFile(infile,
                                     outfile)
コード例 #10
0
def buildPicardStats(infiles, outfile):
    ''' build Picard alignment stats '''
    infile, reffile = infiles

    # patch for mapping against transcriptome - switch genomic reference
    # to transcriptomic sequences
    if "transcriptome.dir" in infile:
        reffile = "refcoding.fa"

    PipelineBamStats.buildPicardAlignmentStats(infile,
                                               outfile,
                                               reffile)
コード例 #11
0
def loadIdxStats(infiles, outfile):
    '''merge idxstats files into single dataframe and load
    to database

    Loads tables into the database
       * mapped_reads_per_chromosome

    Arguments
    ---------
    infiles : list
        list where each element is a string of the filename containing samtools
        idxstats output. Filename format is expected to be 'sample.idxstats'
    outfile : string
        Logfile. The table name will be derived from `outfile`.'''

    PipelineBamStats.loadIdxstats(infiles, outfile)
コード例 #12
0
def loadIdxStats(infiles, outfile):
    '''merge idxstats files into single dataframe and load
    to database

    Loads tables into the database
       * mapped_reads_per_chromosome

    Arguments
    ---------
    infiles : list
        list where each element is a string of the filename containing samtools
        idxstats output. Filename format is expected to be 'sample.idxstats'
    outfile : string
        Logfile. The table name will be derived from `outfile`.'''

    PipelineBamStats.loadIdxstats(infiles, outfile)
コード例 #13
0
def buildBAMStats(infiles, outfile):
    '''count number of reads mapped, duplicates, etc.

    Excludes regions overlapping repetitive RNA sequences

    Parameters
    ----------
    infiles : list
    infiles[0] : str
       Input filename in :term:`bam` format
    infiles[1] : str
       Input filename with number of reads per sample

    outfile : str
       Output filename with read stats

    annotations_interface_rna_gtf : str
        :term:`PARMS`. :term:`gtf` format file with repetitive rna
    '''

    rna_file = PARAMS["annotations_interface_rna_gff"]

    job_memory = "32G"

    bamfile, readsfile = infiles

    nreads = PipelineBamStats.getNumReadsFromReadsFile(readsfile)
    track = P.snip(os.path.basename(readsfile),
                   ".nreads")

    # if a fastq file exists, submit for counting
    if os.path.exists(track + ".fastq.gz"):
        fastqfile = track + ".fastq.gz"
    elif os.path.exists(track + ".fastq.1.gz"):
        fastqfile = track + ".fastq.1.gz"
    else:
        fastqfile = None

    if fastqfile is not None:
        fastq_option = "--fastq-file=%s" % fastqfile
    else:
        fastq_option = ""

    statement = '''
    cgat bam2stats
         %(fastq_option)s
         --force-output
         --mask-bed-file=%(rna_file)s
         --ignore-masked-reads
         --num-reads=%(nreads)i
         --output-filename-pattern=%(outfile)s.%%s
    < %(bamfile)s
    > %(outfile)s
    '''

    P.run()
コード例 #14
0
def buildBAMStats(infiles, outfile):
    '''count number of reads mapped, duplicates, etc.

    Excludes regions overlapping repetitive RNA sequences

    Parameters
    ----------
    infiles : list
    infiles[0] : str
       Input filename in :term:`bam` format
    infiles[1] : str
       Input filename with number of reads per sample

    outfile : str
       Output filename with read stats

    annotations_interface_rna_gtf : str
        :term:`PARMS`. :term:`gtf` format file with repetitive rna
    '''

    rna_file = PARAMS["annotations_interface_rna_gff"]

    job_memory = "32G"

    bamfile, readsfile = infiles

    nreads = PipelineBamStats.getNumReadsFromReadsFile(readsfile)
    track = P.snip(os.path.basename(readsfile), ".nreads")

    # if a fastq file exists, submit for counting
    if os.path.exists(track + ".fastq.gz"):
        fastqfile = track + ".fastq.gz"
    elif os.path.exists(track + ".fastq.1.gz"):
        fastqfile = track + ".fastq.1.gz"
    else:
        fastqfile = None

    if fastqfile is not None:
        fastq_option = "--fastq-file=%s" % fastqfile
    else:
        fastq_option = ""

    statement = '''
    cgat bam2stats
         %(fastq_option)s
         --force-output
         --mask-bed-file=%(rna_file)s
         --ignore-masked-reads
         --num-reads=%(nreads)i
         --output-filename-pattern=%(outfile)s.%%s
    < %(bamfile)s
    > %(outfile)s
    '''

    P.run()
コード例 #15
0
def buildPicardDuplicationStats(infile, outfile):
    '''Get duplicate stats from picard MarkDuplicates '''
    PipelineBamStats.buildPicardDuplicationStats(infile, outfile)
コード例 #16
0
def loadBAMStats(infiles, outfile):
    ''' load bam statistics into bam_stats table '''
    PipelineBamStats.loadBAMStats(infiles, outfile)
コード例 #17
0
def loadContextStats(infiles, outfile):
    ''' load context mapping statistics into context_stats table '''
    PipelineBamStats.loadSummarizedContextStats(infiles, outfile)
コード例 #18
0
def loadPicardStats(infiles, outfile):
    '''merge alignment stats into single tables.'''
    PipelineBamStats.loadPicardAlignmentStats(infiles, outfile)
コード例 #19
0
def loadPicardDuplicationStats(infiles, outfiles):
    '''merge alignment stats into single tables.'''

    PipelineBamStats.loadPicardDuplicationStats(infiles, outfiles)
コード例 #20
0
def buildContextStats(infiles, outfile):
    ''' build mapping context stats '''
    PipelineBamStats.summarizeTagsWithinContext(infiles[0], infiles[1],
                                                outfile)
コード例 #21
0
def loadPicardRnaSeqMetrics(infiles, outfiles):
    '''merge alignment stats into single tables.'''
    PipelineBamStats.loadPicardRnaSeqMetrics(infiles, outfiles)
コード例 #22
0
def loadTranscriptProfile(infiles, outfile):
    ''' merge transcript profiles into a single table'''
    PipelineBamStats.loadTranscriptProfile(infiles, outfile)
コード例 #23
0
def loadContextStats(infiles, outfile):
    ''' load context mapping statistics into context_stats table '''
    PipelineBamStats.loadSummarizedContextStats(infiles, outfile)
コード例 #24
0
def loadPicardStats(infiles, outfile):
    '''merge alignment stats into single tables.'''
    PipelineBamStats.loadPicardAlignmentStats(infiles, outfile)
コード例 #25
0
def loadPicardRnaSeqMetrics(infiles, outfiles):
    '''merge alignment stats into single tables.'''
    PipelineBamStats.loadPicardRnaSeqMetrics(infiles, outfiles)
コード例 #26
0
def buildPicardDuplicationStats(infile, outfile):
    '''Get duplicate stats from picard MarkDuplicates '''
    PipelineBamStats.buildPicardDuplicationStats(infile, outfile)
コード例 #27
0
def loadPicardDuplicationStats(infiles, outfiles):
    '''merge alignment stats into single tables.'''

    PipelineBamStats.loadPicardDuplicationStats(infiles, outfiles)
コード例 #28
0
def loadStrandSpecificity(infiles, outfile):
    ''' merge strand specificity data into a single table'''
    PipelineBamStats.loadStrandSpecificity(infiles, outfile)
コード例 #29
0
def loadTranscriptProfile(infiles, outfile):
    ''' merge transcript profiles into a single table'''
    PipelineBamStats.loadTranscriptProfile(infiles, outfile)
コード例 #30
0
def loadCountReads(infiles, outfile):
    ''' load read counts count_reads table '''
    PipelineBamStats.loadCountReads(infiles, outfile)
コード例 #31
0
def loadCountReads(infiles, outfile):
    ''' load read counts count_reads table '''
    PipelineBamStats.loadCountReads(infiles, outfile)
コード例 #32
0
def buildContextStats(infiles, outfile):
    ''' build mapping context stats '''
    PipelineBamStats.summarizeTagsWithinContext(
        infiles[0], infiles[1], outfile)
コード例 #33
0
def loadStrandSpecificity(infiles, outfile):
    ''' merge strand specificity data into a single table'''
    PipelineBamStats.loadStrandSpecificity(infiles, outfile)
コード例 #34
0
def loadBAMStats(infiles, outfile):
    ''' load bam statistics into bam_stats table '''
    PipelineBamStats.loadBAMStats(infiles, outfile)