def mergeCufflinksGeneFPKM(infiles, outfile):
    '''build aggregate table with cufflinks FPKM values.'''
    PipelineRnaseq.mergeCufflinksFPKM(
        infiles,
        outfile,
        identifier="gene_id",
        tracking="genes_tracking")
def mergeCufflinksIsoformFPKM(infiles, outfile):
    '''build aggregate table with cufflinks FPKM values.'''
    PipelineRnaseq.mergeCufflinksFPKM(
        infiles,
        outfile,
        identifier="transcript_id",
        tracking="fpkm_tracking")
def quantifyWithStringTie(infiles, outfile):
    '''Quantify existing samples against genesets'''

    bamfile, gtffile = infiles
    outdir = P.snip(outfile, ".log")
    RnaSeq.quantifyWithStringTie(bamfile=bamfile,
                                 gtffile=gtffile,
                                 outdir=outdir)
Example #4
0
def count_chunks(infiles, outfile):

    gtffile = infiles[1]
    bamfile = infiles[0]

    PipelineRnaseq.runFeatureCounts(gtffile,
                                    bamfile,
                                    outfile,
                                    job_threads=2,
                                    strand=0,
                                    options=' -f -O -T 2 --primary -p -B -C')
def count_chunks(infiles, outfile):

    gtffile = infiles[1]
    bamfile = infiles[0]

    PipelineRnaseq.runFeatureCounts(
        gtffile,
        bamfile,
        outfile,
        job_threads=PARAMS["featurecounts_threads"],
        strand=PARAMS["stranded"],
        options="-f " + PARAMS["featurecounts_options"])
def runSailfishAddModels(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Sailfish.

    Runs the sailfish "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)
    '''

    infiles, transcript2geneMap = infiles
    index, fastqfile = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.SailfishQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["sailfish_memory"],
        options=PARAMS["sailfish_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        libtype=PARAMS['sailfish_libtype'],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()
def runKallistoAddModels(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Kallisto.

    Runs the kallisto "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)
    '''

    infiles, transcript2geneMap = infiles
    index, fastqfile = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.KallistoQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["kallisto_memory"],
        options=PARAMS["kallisto_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        fragment_length=PARAMS["kallisto_fragment_length"],
        fragment_sd=PARAMS["kallisto_fragment_sd"],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()
def buildFeatureCounts(infiles, outfile):
    '''counts reads falling into "features", which by default are genes.

    A read overlaps if at least one bp overlaps.

    Pairs and strandedness can be used to resolve reads falling into
    more than one feature. Reads that cannot be resolved to a single
    feature are ignored.

    '''
    bamfile, annotations = infiles
    PipelineRnaseq.runFeatureCounts(annotations,
                                    bamfile,
                                    outfile,
                                    nthreads=PARAMS['featurecounts_threads'],
                                    strand=PARAMS['featurecounts_strand'],
                                    options=PARAMS['featurecounts_options'])
def buildFeatureCounts(infiles, outfile):
    '''counts reads falling into "features", which by default are genes.

    A read overlaps if at least one bp overlaps.

    Pairs and strandedness can be used to resolve reads falling into
    more than one feature. Reads that cannot be resolved to a single
    feature are ignored.

    '''
    bamfile, annotations = infiles
    PipelineRnaseq.runFeatureCounts(
        annotations,
        bamfile,
        outfile,
        nthreads=PARAMS['featurecounts_threads'],
        strand=PARAMS['featurecounts_strand'],
        options=PARAMS['featurecounts_options'])
def runKallisto(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Kallisto.

    Runs the kallisto "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)

    Parameters
    ----------
    infiles: list
        list with three components
        0 - string - path to fastq file to quantify using Kallisto
        1 - string - path to Kallisto index file
        2 - string - path totable mapping transcripts to genes

    kallisto_threads: int
       :term: `PARAMS` the number of threads for Kallisto
    kallisto_memory: str
       :term: `PARAMS` the job memory for Kallisto
    kallisto_options: str
       :term: `PARAMS` string to append to the Kallisto quant command to
       provide specific
       options, see https://pachterlab.github.io/kallisto/manual
    kallisto_bootstrap: int
       :term: `PARAMS` number of bootstrap samples to run.
       Note, you need to bootstrap for differential expression with sleuth
       if there are no technical replicates. If you only need point estimates,
       set to 1.  Note that bootstrap must be set to at least 1
    kallisto_fragment_length: int
       :term: `PARAMS` Fragment length for Kallisto, required for single end
       reads only
    kallisto_fragment_sd: int
       :term: `PARAMS` Fragment length standard deviation for Kallisto,
       required for single end reads only.
    outfiles: list
       paths to output files for transcripts and genes
    '''

    fastqfile, index, transcript2geneMap = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.KallistoQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["kallisto_memory"],
        options=PARAMS["kallisto_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        fragment_length=PARAMS["kallisto_fragment_length"],
        fragment_sd=PARAMS["kallisto_fragment_sd"],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()
def runSalmon(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Salmon.

    Runs the salmon "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)

    Parameters
    ----------
    infiles: list
        list with three components
        0 - list of strings - paths to fastq files to merge then quantify
        across using sailfish
        1 - string - path to sailfish index file
        2 - string - path to table mapping transcripts to genes

    salmon_threads: int
       :term: `PARAMS` the number of threads for salmon
    salmon_memory: str
       :term: `PARAMS` the job memory for salmon
    salmon_options: str
       :term: `PARAMS` string to append to the salmon quant command to
       provide specific
       options, see http://sailfish.readthedocs.io/en/master/salmon.html
    salmon_bootstrap: int
       :term: `PARAMS` number of bootstrap samples to run.
       Note, you need to bootstrap for differential expression with sleuth
       if there are no technical replicates. If you only need point estimates,
       set to 1.
    salmon_libtype: str
       :term: `PARAMS` salmon library type
       as for sailfish - use
       http://sailfish.readthedocs.io/en/master/library_type.html#fraglibtype
    outfiles: list
       paths to output files for transcripts and genes
    '''

    fastqfile, index, transcript2geneMap = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.SalmonQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["salmon_memory"],
        options=PARAMS["salmon_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        libtype=PARAMS['salmon_libtype'],
        kmer=PARAMS['alignment_free_kmer'],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()
def runFeatureCountsAddModels(infiles, outfiles):
    ''' 
    First align with hisat2 and then quantify with FeatureCounts
    '''

    junctions, infile, annotations, sequins_genome_index, transcript_map = infiles

    ### align with hisat ###
    job_threads = PARAMS["hisat_threads"]
    job_memory = PARAMS["hisat_memory"]

    tmp_outfile = P.getTempFilename()

    hisat_index_dir = os.path.dirname(sequins_genome_index)
    genome = P.snip(os.path.basename(sequins_genome_index), ".1.ht2")

    m = PipelineMapping.Hisat(executable='hisat2',
                              strip_sequence=0,
                              stranded=PARAMS["hisat_strandedness"])

    statement = m.build((infile, ), tmp_outfile)

    P.run()

    ### quantify with featureCounts ###
    transcript_outfile, gene_outfile = outfiles

    Quantifier = PipelineRnaseq.FeatureCountsQuantifier(
        infile=tmp_outfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        job_threads=PARAMS['featurecounts_threads'],
        strand=PARAMS['featurecounts_strand'],
        options=PARAMS['featurecounts_options'],
        annotations=annotations)

    Quantifier.run_all()

    os.unlink(tmp_outfile)
def loadStringTieQuant(infiles, outfile):

    RnaSeq.mergeAndLoadStringTie(infiles, ".+/(.+)_.+/", outfile)
def loadCufflinks(infile, outfile):
    '''load expression level measurements.'''
    PipelineRnaseq.loadCufflinks(infile, outfile)
def runCufflinks(infiles, outfile):
    '''estimate expression levels in each set using cufflinks.'''
    PipelineRnaseq.runCufflinks(infiles, outfile)
def loadCufflinks(infile, outfile):
    '''load expression level measurements.'''
    PipelineRnaseq.loadCufflinks(infile, outfile)
def runCufflinks(infiles, outfile):
    '''estimate expression levels in each set using cufflinks.'''
    PipelineRnaseq.runCufflinks(infiles, outfile)