Python PipelineRnaseq 예제들, CGATPipelines.PipelineRnaseq Python 예제들

예제 #1

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: Charlie-George/cgat

def mergeCufflinksGeneFPKM(infiles, outfile):
    '''build aggregate table with cufflinks FPKM values.'''
    PipelineRnaseq.mergeCufflinksFPKM(
        infiles,
        outfile,
        identifier="gene_id",
        tracking="genes_tracking")

예제 #2

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: Charlie-George/cgat

def mergeCufflinksIsoformFPKM(infiles, outfile):
    '''build aggregate table with cufflinks FPKM values.'''
    PipelineRnaseq.mergeCufflinksFPKM(
        infiles,
        outfile,
        identifier="transcript_id",
        tracking="fpkm_tracking")

예제 #3

0

파일 보기

파일: pipeline_utrons.py 프로젝트: CristinaAlexandru/pipeline_utrons

def quantifyWithStringTie(infiles, outfile):
    '''Quantify existing samples against genesets'''

    bamfile, gtffile = infiles
    outdir = P.snip(outfile, ".log")
    RnaSeq.quantifyWithStringTie(bamfile=bamfile,
                                 gtffile=gtffile,
                                 outdir=outdir)

예제 #4

0

파일 보기

def count_chunks(infiles, outfile):

    gtffile = infiles[1]
    bamfile = infiles[0]

    PipelineRnaseq.runFeatureCounts(gtffile,
                                    bamfile,
                                    outfile,
                                    job_threads=2,
                                    strand=0,
                                    options=' -f -O -T 2 --primary -p -B -C')

예제 #5

0

파일 보기

파일: pipeline_retained_introns.py 프로젝트: sudlab/pipeline_retained_introns

def count_chunks(infiles, outfile):

    gtffile = infiles[1]
    bamfile = infiles[0]

    PipelineRnaseq.runFeatureCounts(
        gtffile,
        bamfile,
        outfile,
        job_threads=PARAMS["featurecounts_threads"],
        strand=PARAMS["stranded"],
        options="-f " + PARAMS["featurecounts_options"])

예제 #6

0

파일 보기

파일: pipeline_ref_transcriptome_paper.py 프로젝트: TomSmithCGAT/RefTranscriptome

def runSailfishAddModels(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Sailfish.

    Runs the sailfish "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)
    '''

    infiles, transcript2geneMap = infiles
    index, fastqfile = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.SailfishQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["sailfish_memory"],
        options=PARAMS["sailfish_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        libtype=PARAMS['sailfish_libtype'],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

예제 #7

0

파일 보기

파일: pipeline_ref_transcriptome_paper.py 프로젝트: TomSmithCGAT/RefTranscriptome

def runKallistoAddModels(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Kallisto.

    Runs the kallisto "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)
    '''

    infiles, transcript2geneMap = infiles
    index, fastqfile = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.KallistoQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["kallisto_memory"],
        options=PARAMS["kallisto_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        fragment_length=PARAMS["kallisto_fragment_length"],
        fragment_sd=PARAMS["kallisto_fragment_sd"],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

예제 #8

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: lesheng/cgat

def buildFeatureCounts(infiles, outfile):
    '''counts reads falling into "features", which by default are genes.

    A read overlaps if at least one bp overlaps.

    Pairs and strandedness can be used to resolve reads falling into
    more than one feature. Reads that cannot be resolved to a single
    feature are ignored.

    '''
    bamfile, annotations = infiles
    PipelineRnaseq.runFeatureCounts(annotations,
                                    bamfile,
                                    outfile,
                                    nthreads=PARAMS['featurecounts_threads'],
                                    strand=PARAMS['featurecounts_strand'],
                                    options=PARAMS['featurecounts_options'])

예제 #9

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: jmadzo/cgat

def buildFeatureCounts(infiles, outfile):
    '''counts reads falling into "features", which by default are genes.

    A read overlaps if at least one bp overlaps.

    Pairs and strandedness can be used to resolve reads falling into
    more than one feature. Reads that cannot be resolved to a single
    feature are ignored.

    '''
    bamfile, annotations = infiles
    PipelineRnaseq.runFeatureCounts(
        annotations,
        bamfile,
        outfile,
        nthreads=PARAMS['featurecounts_threads'],
        strand=PARAMS['featurecounts_strand'],
        options=PARAMS['featurecounts_options'])

예제 #10

0

파일 보기

파일: pipeline_ref_transcriptome_paper.py 프로젝트: TomSmithCGAT/RefTranscriptome

def runKallisto(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Kallisto.

    Runs the kallisto "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)

    Parameters
    ----------
    infiles: list
        list with three components
        0 - string - path to fastq file to quantify using Kallisto
        1 - string - path to Kallisto index file
        2 - string - path totable mapping transcripts to genes

    kallisto_threads: int
       :term: `PARAMS` the number of threads for Kallisto
    kallisto_memory: str
       :term: `PARAMS` the job memory for Kallisto
    kallisto_options: str
       :term: `PARAMS` string to append to the Kallisto quant command to
       provide specific
       options, see https://pachterlab.github.io/kallisto/manual
    kallisto_bootstrap: int
       :term: `PARAMS` number of bootstrap samples to run.
       Note, you need to bootstrap for differential expression with sleuth
       if there are no technical replicates. If you only need point estimates,
       set to 1.  Note that bootstrap must be set to at least 1
    kallisto_fragment_length: int
       :term: `PARAMS` Fragment length for Kallisto, required for single end
       reads only
    kallisto_fragment_sd: int
       :term: `PARAMS` Fragment length standard deviation for Kallisto,
       required for single end reads only.
    outfiles: list
       paths to output files for transcripts and genes
    '''

    fastqfile, index, transcript2geneMap = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.KallistoQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["kallisto_memory"],
        options=PARAMS["kallisto_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        fragment_length=PARAMS["kallisto_fragment_length"],
        fragment_sd=PARAMS["kallisto_fragment_sd"],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

예제 #11

0

파일 보기

파일: pipeline_ref_transcriptome_paper.py 프로젝트: TomSmithCGAT/RefTranscriptome

def runSalmon(infiles, outfiles):
    '''
    Computes read counts across transcripts and genes based on a fastq
    file and an indexed transcriptome using Salmon.

    Runs the salmon "quant" function across transcripts with the specified
    options.  Read counts across genes are counted as the total in all
    transcripts of that gene (based on the getTranscript2GeneMap table)

    Parameters
    ----------
    infiles: list
        list with three components
        0 - list of strings - paths to fastq files to merge then quantify
        across using sailfish
        1 - string - path to sailfish index file
        2 - string - path to table mapping transcripts to genes

    salmon_threads: int
       :term: `PARAMS` the number of threads for salmon
    salmon_memory: str
       :term: `PARAMS` the job memory for salmon
    salmon_options: str
       :term: `PARAMS` string to append to the salmon quant command to
       provide specific
       options, see http://sailfish.readthedocs.io/en/master/salmon.html
    salmon_bootstrap: int
       :term: `PARAMS` number of bootstrap samples to run.
       Note, you need to bootstrap for differential expression with sleuth
       if there are no technical replicates. If you only need point estimates,
       set to 1.
    salmon_libtype: str
       :term: `PARAMS` salmon library type
       as for sailfish - use
       http://sailfish.readthedocs.io/en/master/library_type.html#fraglibtype
    outfiles: list
       paths to output files for transcripts and genes
    '''

    fastqfile, index, transcript2geneMap = infiles

    transcript_outfile, gene_outfile = outfiles
    Quantifier = PipelineRnaseq.SalmonQuantifier(
        infile=fastqfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        annotations=index,
        job_threads=PARAMS["alignment_free_threads"],
        job_memory=PARAMS["salmon_memory"],
        options=PARAMS["salmon_options"],
        bootstrap=PARAMS["alignment_free_bootstrap"],
        libtype=PARAMS['salmon_libtype'],
        kmer=PARAMS['alignment_free_kmer'],
        transcript2geneMap=transcript2geneMap)

    Quantifier.run_all()

예제 #12

0

파일 보기

파일: pipeline_ref_transcriptome_paper.py 프로젝트: TomSmithCGAT/RefTranscriptome

def runFeatureCountsAddModels(infiles, outfiles):
    ''' 
    First align with hisat2 and then quantify with FeatureCounts
    '''

    junctions, infile, annotations, sequins_genome_index, transcript_map = infiles

    ### align with hisat ###
    job_threads = PARAMS["hisat_threads"]
    job_memory = PARAMS["hisat_memory"]

    tmp_outfile = P.getTempFilename()

    hisat_index_dir = os.path.dirname(sequins_genome_index)
    genome = P.snip(os.path.basename(sequins_genome_index), ".1.ht2")

    m = PipelineMapping.Hisat(executable='hisat2',
                              strip_sequence=0,
                              stranded=PARAMS["hisat_strandedness"])

    statement = m.build((infile, ), tmp_outfile)

    P.run()

    ### quantify with featureCounts ###
    transcript_outfile, gene_outfile = outfiles

    Quantifier = PipelineRnaseq.FeatureCountsQuantifier(
        infile=tmp_outfile,
        transcript_outfile=transcript_outfile,
        gene_outfile=gene_outfile,
        job_threads=PARAMS['featurecounts_threads'],
        strand=PARAMS['featurecounts_strand'],
        options=PARAMS['featurecounts_options'],
        annotations=annotations)

    Quantifier.run_all()

    os.unlink(tmp_outfile)

예제 #13

0

파일 보기

파일: pipeline_utrons.py 프로젝트: CristinaAlexandru/pipeline_utrons

def loadStringTieQuant(infiles, outfile):

    RnaSeq.mergeAndLoadStringTie(infiles, ".+/(.+)_.+/", outfile)

예제 #14

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: lesheng/cgat

def loadCufflinks(infile, outfile):
    '''load expression level measurements.'''
    PipelineRnaseq.loadCufflinks(infile, outfile)

예제 #15

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: jmadzo/cgat

def runCufflinks(infiles, outfile):
    '''estimate expression levels in each set using cufflinks.'''
    PipelineRnaseq.runCufflinks(infiles, outfile)

예제 #16

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: jmadzo/cgat

def loadCufflinks(infile, outfile):
    '''load expression level measurements.'''
    PipelineRnaseq.loadCufflinks(infile, outfile)

예제 #17

0

파일 보기

파일: pipeline_rnaseqdiffexpression.py 프로젝트: lesheng/cgat

def runCufflinks(infiles, outfile):
    '''estimate expression levels in each set using cufflinks.'''
    PipelineRnaseq.runCufflinks(infiles, outfile)