def strandSpecificity(infile, outfile): '''This function will determine the strand specificity of your library from the bam file''' iterations = "1000000" PipelineBamStats.getStrandSpecificity(infile, outfile, iterations)
def processGenomicContext(infile, outfile): ''' This module process genomic context file. It assigns each and every features of context file to a specific catagory. It helps us to understand heiarchical classification of features. ''' PipelineBamStats.defineBedFeatures(infile, outfile)
def intBam(infile, outfile): '''make an intermediate bam file if there is no sequence infomation. If there is no sequence quality then make a softlink. Picard tools has an issue when quality score infomation is missing''' if PARAMS["bam_sequence_stripped"] is True: PipelineBamStats.addPseudoSequenceQuality(infile, outfile) else: PipelineBamStats.copyBamFile(infile, outfile)
def buildPicardStats(infiles, outfile): ''' build Picard alignment stats ''' infile, reffile = infiles # patch for mapping against transcriptome - switch genomic reference # to transcriptomic sequences if "transcriptome.dir" in infile: reffile = "refcoding.fa" PipelineBamStats.buildPicardAlignmentStats(infile, outfile, reffile)
def buildPicardRnaSeqMetrics(infiles, outfile): '''Get duplicate stats from picard RNASeqMetrics ''' # convert strandness to tophat-style library type if PARAMS["strandness"] == ("RF" or "R"): strand = "SECOND_READ_TRANSCRIPTION_STRAND" elif PARAMS["strandness"] == ("FR" or "F"): strand = "FIRST_READ_TRANSCRIPTION_STRAND" else: strand = "NONE" PipelineBamStats.buildPicardRnaSeqMetrics(infiles, strand, outfile)
def loadIdxStats(infiles, outfile): '''merge idxstats files into single dataframe and load to database Loads tables into the database * mapped_reads_per_chromosome Arguments --------- infiles : list list where each element is a string of the filename containing samtools idxstats output. Filename format is expected to be 'sample.idxstats' outfile : string Logfile. The table name will be derived from `outfile`.''' PipelineBamStats.loadIdxstats(infiles, outfile)
def buildBAMStats(infiles, outfile): '''count number of reads mapped, duplicates, etc. Excludes regions overlapping repetitive RNA sequences Parameters ---------- infiles : list infiles[0] : str Input filename in :term:`bam` format infiles[1] : str Input filename with number of reads per sample outfile : str Output filename with read stats annotations_interface_rna_gtf : str :term:`PARMS`. :term:`gtf` format file with repetitive rna ''' rna_file = PARAMS["annotations_interface_rna_gff"] job_memory = "32G" bamfile, readsfile = infiles nreads = PipelineBamStats.getNumReadsFromReadsFile(readsfile) track = P.snip(os.path.basename(readsfile), ".nreads") # if a fastq file exists, submit for counting if os.path.exists(track + ".fastq.gz"): fastqfile = track + ".fastq.gz" elif os.path.exists(track + ".fastq.1.gz"): fastqfile = track + ".fastq.1.gz" else: fastqfile = None if fastqfile is not None: fastq_option = "--fastq-file=%s" % fastqfile else: fastq_option = "" statement = ''' cgat bam2stats %(fastq_option)s --force-output --mask-bed-file=%(rna_file)s --ignore-masked-reads --num-reads=%(nreads)i --output-filename-pattern=%(outfile)s.%%s < %(bamfile)s > %(outfile)s ''' P.run()
def buildPicardDuplicationStats(infile, outfile): '''Get duplicate stats from picard MarkDuplicates ''' PipelineBamStats.buildPicardDuplicationStats(infile, outfile)
def loadBAMStats(infiles, outfile): ''' load bam statistics into bam_stats table ''' PipelineBamStats.loadBAMStats(infiles, outfile)
def loadContextStats(infiles, outfile): ''' load context mapping statistics into context_stats table ''' PipelineBamStats.loadSummarizedContextStats(infiles, outfile)
def loadPicardStats(infiles, outfile): '''merge alignment stats into single tables.''' PipelineBamStats.loadPicardAlignmentStats(infiles, outfile)
def loadPicardDuplicationStats(infiles, outfiles): '''merge alignment stats into single tables.''' PipelineBamStats.loadPicardDuplicationStats(infiles, outfiles)
def buildContextStats(infiles, outfile): ''' build mapping context stats ''' PipelineBamStats.summarizeTagsWithinContext(infiles[0], infiles[1], outfile)
def loadPicardRnaSeqMetrics(infiles, outfiles): '''merge alignment stats into single tables.''' PipelineBamStats.loadPicardRnaSeqMetrics(infiles, outfiles)
def loadTranscriptProfile(infiles, outfile): ''' merge transcript profiles into a single table''' PipelineBamStats.loadTranscriptProfile(infiles, outfile)
def loadStrandSpecificity(infiles, outfile): ''' merge strand specificity data into a single table''' PipelineBamStats.loadStrandSpecificity(infiles, outfile)
def loadCountReads(infiles, outfile): ''' load read counts count_reads table ''' PipelineBamStats.loadCountReads(infiles, outfile)
def buildContextStats(infiles, outfile): ''' build mapping context stats ''' PipelineBamStats.summarizeTagsWithinContext( infiles[0], infiles[1], outfile)