Python PipelineRnaseq Examples, CGATPipelines.PipelineRnaseq Python Examples

Example #1

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: Charlie-George/cgat

def mergeCufflinksGeneFPKM(infiles, outfile):
    '''build aggregate table with cufflinks FPKM values.'''
    PipelineRnaseq.mergeCufflinksFPKM(
        infiles,
        outfile,
        identifier="gene_id",
        tracking="genes_tracking")

Example #2

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: Charlie-George/cgat

def mergeCufflinksIsoformFPKM(infiles, outfile):
    '''build aggregate table with cufflinks FPKM values.'''
    PipelineRnaseq.mergeCufflinksFPKM(
        infiles,
        outfile,
        identifier="transcript_id",
        tracking="fpkm_tracking")

Example #3

0

Show file

File: pipeline_utrons.py Project: CristinaAlexandru/pipeline_utrons

def quantifyWithStringTie(infiles, outfile):
    '''Quantify existing samples against genesets'''

    bamfile, gtffile = infiles
    outdir = P.snip(outfile, ".log")
    RnaSeq.quantifyWithStringTie(bamfile=bamfile,
                                 gtffile=gtffile,
                                 outdir=outdir)

Example #4

0

Show file

def count_chunks(infiles, outfile):

    gtffile = infiles[1]
    bamfile = infiles[0]

    PipelineRnaseq.runFeatureCounts(gtffile,
                                    bamfile,
                                    outfile,
                                    job_threads=2,
                                    strand=0,
                                    options=' -f -O -T 2 --primary -p -B -C')

Example #5

0

Show file

File: pipeline_retained_introns.py Project: sudlab/pipeline_retained_introns

def count_chunks(infiles, outfile):

    gtffile = infiles[1]
    bamfile = infiles[0]

    PipelineRnaseq.runFeatureCounts(
        gtffile,
        bamfile,
        outfile,
        job_threads=PARAMS["featurecounts_threads"],
        strand=PARAMS["stranded"],
        options="-f " + PARAMS["featurecounts_options"])

Example #6

0

Show file

File: pipeline_ref_transcriptome_paper.py Project: TomSmithCGAT/RefTranscriptome

def runSailfishAddModels(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Sailfish.

    Runs the sailfish "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)
    '''

    infiles, transcript2geneMap = infiles
    index, fastqfile = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.SailfishQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["sailfish_memory"],
        options=PARAMS["sailfish_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        libtype=PARAMS['sailfish_libtype'],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

Example #7

0

Show file

File: pipeline_ref_transcriptome_paper.py Project: TomSmithCGAT/RefTranscriptome

def runKallistoAddModels(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Kallisto.

    Runs the kallisto "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)
    '''

    infiles, transcript2geneMap = infiles
    index, fastqfile = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.KallistoQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["kallisto_memory"],
        options=PARAMS["kallisto_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        fragment_length=PARAMS["kallisto_fragment_length"],
        fragment_sd=PARAMS["kallisto_fragment_sd"],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

Example #8

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: lesheng/cgat

def buildFeatureCounts(infiles, outfile):
    '''counts reads falling into "features", which by default are genes.

    A read overlaps if at least one bp overlaps.

    Pairs and strandedness can be used to resolve reads falling into
    more than one feature. Reads that cannot be resolved to a single
    feature are ignored.

    '''
    bamfile, annotations = infiles
    PipelineRnaseq.runFeatureCounts(annotations,
                                    bamfile,
                                    outfile,
                                    nthreads=PARAMS['featurecounts_threads'],
                                    strand=PARAMS['featurecounts_strand'],
                                    options=PARAMS['featurecounts_options'])

Example #9

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: jmadzo/cgat

def buildFeatureCounts(infiles, outfile):
    '''counts reads falling into "features", which by default are genes.

    A read overlaps if at least one bp overlaps.

    Pairs and strandedness can be used to resolve reads falling into
    more than one feature. Reads that cannot be resolved to a single
    feature are ignored.

    '''
    bamfile, annotations = infiles
    PipelineRnaseq.runFeatureCounts(
        annotations,
        bamfile,
        outfile,
        nthreads=PARAMS['featurecounts_threads'],
        strand=PARAMS['featurecounts_strand'],
        options=PARAMS['featurecounts_options'])

Example #10

0

Show file

File: pipeline_ref_transcriptome_paper.py Project: TomSmithCGAT/RefTranscriptome

def runKallisto(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Kallisto.

    Runs the kallisto "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)

    Parameters
    ----------
    infiles: list
        list with three components
        0 - string - path to fastq file to quantify using Kallisto
        1 - string - path to Kallisto index file
        2 - string - path totable mapping transcripts to genes

    kallisto_threads: int
       :term: `PARAMS` the number of threads for Kallisto
    kallisto_memory: str
       :term: `PARAMS` the job memory for Kallisto
    kallisto_options: str
       :term: `PARAMS` string to append to the Kallisto quant command to
       provide specific
       options, see https://pachterlab.github.io/kallisto/manual
    kallisto_bootstrap: int
       :term: `PARAMS` number of bootstrap samples to run.
       Note, you need to bootstrap for differential expression with sleuth
       if there are no technical replicates. If you only need point estimates,
       set to 1.  Note that bootstrap must be set to at least 1
    kallisto_fragment_length: int
       :term: `PARAMS` Fragment length for Kallisto, required for single end
       reads only
    kallisto_fragment_sd: int
       :term: `PARAMS` Fragment length standard deviation for Kallisto,
       required for single end reads only.
    outfiles: list
       paths to output files for transcripts and genes
    '''

    fastqfile, index, transcript2geneMap = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.KallistoQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["kallisto_memory"],
        options=PARAMS["kallisto_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        fragment_length=PARAMS["kallisto_fragment_length"],
        fragment_sd=PARAMS["kallisto_fragment_sd"],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

Example #11

0

Show file

File: pipeline_ref_transcriptome_paper.py Project: TomSmithCGAT/RefTranscriptome

def runSalmon(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Salmon.

    Runs the salmon "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)

    Parameters
    ----------
    infiles: list
        list with three components
        0 - list of strings - paths to fastq files to merge then quantify
        across using sailfish
        1 - string - path to sailfish index file
        2 - string - path to table mapping transcripts to genes

    salmon_threads: int
       :term: `PARAMS` the number of threads for salmon
    salmon_memory: str
       :term: `PARAMS` the job memory for salmon
    salmon_options: str
       :term: `PARAMS` string to append to the salmon quant command to
       provide specific
       options, see http://sailfish.readthedocs.io/en/master/salmon.html
    salmon_bootstrap: int
       :term: `PARAMS` number of bootstrap samples to run.
       Note, you need to bootstrap for differential expression with sleuth
       if there are no technical replicates. If you only need point estimates,
       set to 1.
    salmon_libtype: str
       :term: `PARAMS` salmon library type
       as for sailfish - use
       http://sailfish.readthedocs.io/en/master/library_type.html#fraglibtype
    outfiles: list
       paths to output files for transcripts and genes
    '''

    fastqfile, index, transcript2geneMap = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.SalmonQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["salmon_memory"],
        options=PARAMS["salmon_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        libtype=PARAMS['salmon_libtype'],
        kmer=PARAMS['alignment_free_kmer'],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

Example #12

0

Show file

File: pipeline_ref_transcriptome_paper.py Project: TomSmithCGAT/RefTranscriptome

def runFeatureCountsAddModels(infiles, outfiles):
    ''' 
    First align with hisat2 and then quantify with FeatureCounts
    '''

    junctions, infile, annotations, sequins_genome_index, transcript_map = infiles

    ### align with hisat ###
    job_threads = PARAMS["hisat_threads"]
    job_memory = PARAMS["hisat_memory"]

    tmp_outfile = P.getTempFilename()

    hisat_index_dir = os.path.dirname(sequins_genome_index)
    genome = P.snip(os.path.basename(sequins_genome_index), ".1.ht2")

    m = PipelineMapping.Hisat(executable='hisat2',
                              strip_sequence=0,
                              stranded=PARAMS["hisat_strandedness"])

    statement = m.build((infile, ), tmp_outfile)

    P.run()

    ### quantify with featureCounts ###
    transcript_outfile, gene_outfile = outfiles

    Quantifier = PipelineRnaseq.FeatureCountsQuantifier(
        infile=tmp_outfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        job_threads=PARAMS['featurecounts_threads'],
        strand=PARAMS['featurecounts_strand'],
        options=PARAMS['featurecounts_options'],
        annotations=annotations)

    Quantifier.run_all()

    os.unlink(tmp_outfile)

Example #13

0

Show file

File: pipeline_utrons.py Project: CristinaAlexandru/pipeline_utrons

def loadStringTieQuant(infiles, outfile):

    RnaSeq.mergeAndLoadStringTie(infiles, ".+/(.+)_.+/", outfile)

Example #14

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: lesheng/cgat

def loadCufflinks(infile, outfile):
    '''load expression level measurements.'''
    PipelineRnaseq.loadCufflinks(infile, outfile)

Example #15

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: jmadzo/cgat

def runCufflinks(infiles, outfile):
    '''estimate expression levels in each set using cufflinks.'''
    PipelineRnaseq.runCufflinks(infiles, outfile)

Example #16

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: jmadzo/cgat

def loadCufflinks(infile, outfile):
    '''load expression level measurements.'''
    PipelineRnaseq.loadCufflinks(infile, outfile)

Example #17

0

Show file

File: pipeline_rnaseqdiffexpression.py Project: lesheng/cgat

def runCufflinks(infiles, outfile):
    '''estimate expression levels in each set using cufflinks.'''
    PipelineRnaseq.runCufflinks(infiles, outfile)