def mergeCufflinksGeneFPKM(infiles, outfile): '''build aggregate table with cufflinks FPKM values.''' PipelineRnaseq.mergeCufflinksFPKM( infiles, outfile, identifier="gene_id", tracking="genes_tracking")
def mergeCufflinksIsoformFPKM(infiles, outfile): '''build aggregate table with cufflinks FPKM values.''' PipelineRnaseq.mergeCufflinksFPKM( infiles, outfile, identifier="transcript_id", tracking="fpkm_tracking")
def quantifyWithStringTie(infiles, outfile): '''Quantify existing samples against genesets''' bamfile, gtffile = infiles outdir = P.snip(outfile, ".log") RnaSeq.quantifyWithStringTie(bamfile=bamfile, gtffile=gtffile, outdir=outdir)
def count_chunks(infiles, outfile): gtffile = infiles[1] bamfile = infiles[0] PipelineRnaseq.runFeatureCounts(gtffile, bamfile, outfile, job_threads=2, strand=0, options=' -f -O -T 2 --primary -p -B -C')
def count_chunks(infiles, outfile): gtffile = infiles[1] bamfile = infiles[0] PipelineRnaseq.runFeatureCounts( gtffile, bamfile, outfile, job_threads=PARAMS["featurecounts_threads"], strand=PARAMS["stranded"], options="-f " + PARAMS["featurecounts_options"])
def runSailfishAddModels(infiles, outfiles): ''' Computes read counts across transcripts and genes based on a fastq file and an indexed transcriptome using Sailfish. Runs the sailfish "quant" function across transcripts with the specified options. Read counts across genes are counted as the total in all transcripts of that gene (based on the getTranscript2GeneMap table) ''' infiles, transcript2geneMap = infiles index, fastqfile = infiles transcript_outfile, gene_outfile = outfiles Quantifier = PipelineRnaseq.SailfishQuantifier( infile=fastqfile, transcript_outfile=transcript_outfile, gene_outfile=gene_outfile, annotations=index, job_threads=PARAMS["alignment_free_threads"], job_memory=PARAMS["sailfish_memory"], options=PARAMS["sailfish_options"], bootstrap=PARAMS["alignment_free_bootstrap"], libtype=PARAMS['sailfish_libtype'], transcript2geneMap=transcript2geneMap) Quantifier.run_all()
def runKallistoAddModels(infiles, outfiles): ''' Computes read counts across transcripts and genes based on a fastq file and an indexed transcriptome using Kallisto. Runs the kallisto "quant" function across transcripts with the specified options. Read counts across genes are counted as the total in all transcripts of that gene (based on the getTranscript2GeneMap table) ''' infiles, transcript2geneMap = infiles index, fastqfile = infiles transcript_outfile, gene_outfile = outfiles Quantifier = PipelineRnaseq.KallistoQuantifier( infile=fastqfile, transcript_outfile=transcript_outfile, gene_outfile=gene_outfile, annotations=index, job_threads=PARAMS["alignment_free_threads"], job_memory=PARAMS["kallisto_memory"], options=PARAMS["kallisto_options"], bootstrap=PARAMS["alignment_free_bootstrap"], fragment_length=PARAMS["kallisto_fragment_length"], fragment_sd=PARAMS["kallisto_fragment_sd"], transcript2geneMap=transcript2geneMap) Quantifier.run_all()
def buildFeatureCounts(infiles, outfile): '''counts reads falling into "features", which by default are genes. A read overlaps if at least one bp overlaps. Pairs and strandedness can be used to resolve reads falling into more than one feature. Reads that cannot be resolved to a single feature are ignored. ''' bamfile, annotations = infiles PipelineRnaseq.runFeatureCounts(annotations, bamfile, outfile, nthreads=PARAMS['featurecounts_threads'], strand=PARAMS['featurecounts_strand'], options=PARAMS['featurecounts_options'])
def buildFeatureCounts(infiles, outfile): '''counts reads falling into "features", which by default are genes. A read overlaps if at least one bp overlaps. Pairs and strandedness can be used to resolve reads falling into more than one feature. Reads that cannot be resolved to a single feature are ignored. ''' bamfile, annotations = infiles PipelineRnaseq.runFeatureCounts( annotations, bamfile, outfile, nthreads=PARAMS['featurecounts_threads'], strand=PARAMS['featurecounts_strand'], options=PARAMS['featurecounts_options'])
def runKallisto(infiles, outfiles): ''' Computes read counts across transcripts and genes based on a fastq file and an indexed transcriptome using Kallisto. Runs the kallisto "quant" function across transcripts with the specified options. Read counts across genes are counted as the total in all transcripts of that gene (based on the getTranscript2GeneMap table) Parameters ---------- infiles: list list with three components 0 - string - path to fastq file to quantify using Kallisto 1 - string - path to Kallisto index file 2 - string - path totable mapping transcripts to genes kallisto_threads: int :term: `PARAMS` the number of threads for Kallisto kallisto_memory: str :term: `PARAMS` the job memory for Kallisto kallisto_options: str :term: `PARAMS` string to append to the Kallisto quant command to provide specific options, see https://pachterlab.github.io/kallisto/manual kallisto_bootstrap: int :term: `PARAMS` number of bootstrap samples to run. Note, you need to bootstrap for differential expression with sleuth if there are no technical replicates. If you only need point estimates, set to 1. Note that bootstrap must be set to at least 1 kallisto_fragment_length: int :term: `PARAMS` Fragment length for Kallisto, required for single end reads only kallisto_fragment_sd: int :term: `PARAMS` Fragment length standard deviation for Kallisto, required for single end reads only. outfiles: list paths to output files for transcripts and genes ''' fastqfile, index, transcript2geneMap = infiles transcript_outfile, gene_outfile = outfiles Quantifier = PipelineRnaseq.KallistoQuantifier( infile=fastqfile, transcript_outfile=transcript_outfile, gene_outfile=gene_outfile, annotations=index, job_threads=PARAMS["alignment_free_threads"], job_memory=PARAMS["kallisto_memory"], options=PARAMS["kallisto_options"], bootstrap=PARAMS["alignment_free_bootstrap"], fragment_length=PARAMS["kallisto_fragment_length"], fragment_sd=PARAMS["kallisto_fragment_sd"], transcript2geneMap=transcript2geneMap) Quantifier.run_all()
def runSalmon(infiles, outfiles): ''' Computes read counts across transcripts and genes based on a fastq file and an indexed transcriptome using Salmon. Runs the salmon "quant" function across transcripts with the specified options. Read counts across genes are counted as the total in all transcripts of that gene (based on the getTranscript2GeneMap table) Parameters ---------- infiles: list list with three components 0 - list of strings - paths to fastq files to merge then quantify across using sailfish 1 - string - path to sailfish index file 2 - string - path to table mapping transcripts to genes salmon_threads: int :term: `PARAMS` the number of threads for salmon salmon_memory: str :term: `PARAMS` the job memory for salmon salmon_options: str :term: `PARAMS` string to append to the salmon quant command to provide specific options, see http://sailfish.readthedocs.io/en/master/salmon.html salmon_bootstrap: int :term: `PARAMS` number of bootstrap samples to run. Note, you need to bootstrap for differential expression with sleuth if there are no technical replicates. If you only need point estimates, set to 1. salmon_libtype: str :term: `PARAMS` salmon library type as for sailfish - use http://sailfish.readthedocs.io/en/master/library_type.html#fraglibtype outfiles: list paths to output files for transcripts and genes ''' fastqfile, index, transcript2geneMap = infiles transcript_outfile, gene_outfile = outfiles Quantifier = PipelineRnaseq.SalmonQuantifier( infile=fastqfile, transcript_outfile=transcript_outfile, gene_outfile=gene_outfile, annotations=index, job_threads=PARAMS["alignment_free_threads"], job_memory=PARAMS["salmon_memory"], options=PARAMS["salmon_options"], bootstrap=PARAMS["alignment_free_bootstrap"], libtype=PARAMS['salmon_libtype'], kmer=PARAMS['alignment_free_kmer'], transcript2geneMap=transcript2geneMap) Quantifier.run_all()
def runFeatureCountsAddModels(infiles, outfiles): ''' First align with hisat2 and then quantify with FeatureCounts ''' junctions, infile, annotations, sequins_genome_index, transcript_map = infiles ### align with hisat ### job_threads = PARAMS["hisat_threads"] job_memory = PARAMS["hisat_memory"] tmp_outfile = P.getTempFilename() hisat_index_dir = os.path.dirname(sequins_genome_index) genome = P.snip(os.path.basename(sequins_genome_index), ".1.ht2") m = PipelineMapping.Hisat(executable='hisat2', strip_sequence=0, stranded=PARAMS["hisat_strandedness"]) statement = m.build((infile, ), tmp_outfile) P.run() ### quantify with featureCounts ### transcript_outfile, gene_outfile = outfiles Quantifier = PipelineRnaseq.FeatureCountsQuantifier( infile=tmp_outfile, transcript_outfile=transcript_outfile, gene_outfile=gene_outfile, job_threads=PARAMS['featurecounts_threads'], strand=PARAMS['featurecounts_strand'], options=PARAMS['featurecounts_options'], annotations=annotations) Quantifier.run_all() os.unlink(tmp_outfile)
def loadStringTieQuant(infiles, outfile): RnaSeq.mergeAndLoadStringTie(infiles, ".+/(.+)_.+/", outfile)
def loadCufflinks(infile, outfile): '''load expression level measurements.''' PipelineRnaseq.loadCufflinks(infile, outfile)
def runCufflinks(infiles, outfile): '''estimate expression levels in each set using cufflinks.''' PipelineRnaseq.runCufflinks(infiles, outfile)