Python PipelineGtfsubset Examples

Programming Language: Python

Namespace/Package Name: CGATPipelines

Examples at hotexamples.com: 20

Python PipelineGtfsubset - 20 examples found. These are the top rated real world Python examples of CGATPipelines.PipelineGtfsubset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SubsetGTF(4)

buildFlatGeneSet(2)

buildGenomicContext(2)

getRepeatDataFromUCSC(2)

loadGeneInformation(2)

SubsetGFF3(1)

connectToUCSC(1)

Example #1

Show file

def importRNAAnnotationFromUCSC(outfile):
    """This task downloads UCSC repetetive RNA types.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_rnatypes"]),
        outfile=outfile,
        remove_contigs_regex=PARAMS["ncbi_remove_contigs"])

Example #2

Show file

def importRepeatsFromUCSC(outfile):
    """This task downloads UCSC repeats types as identified
    in the configuration file.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(dbhandle=connectToUCSC(),
                                            repclasses=P.asList(
                                                PARAMS["ucsc_repeattypes"]),
                                            outfile=outfile)

Example #3

Show file

File: pipeline_genesets.py Project: CGATOxford/CGATPipelines

def importRNAAnnotationFromUCSC(outfile):
    """This task downloads UCSC repetetive RNA types.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_rnatypes"]),
        outfile=outfile,
        remove_contigs_regex=PARAMS["ncbi_remove_contigs"],
        job_memory=PARAMS["job_memory"])

Example #4

Show file

File: pipeline_genesets.py Project: CGATOxford/CGATPipelines

def importRepeatsFromUCSC(outfile):
    """This task downloads UCSC repeats types as identified
    in the configuration file.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_repeattypes"]),
        outfile=outfile,
        job_memory=PARAMS["job_memory"])

Example #5

Show file

def buildNonCodingExonTranscript(infile, outfile):
    '''
    Output of the non-coding exon features from an ENSEMBL gene set

    Remove all of the features from a :term:`gtf` file
    that are features of ``exon`` and are protein-coding

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroptions = [
        PARAMS['ensembl_cgat_feature'], PARAMS['ensembl_cgat_gene_biotype']
    ]
    filteritem = ["exon", "protein_coding"]

    m.filterGTF(outfile, filteroptions, filteritem, operators="and not")

Example #6

Show file

def buildCdsTranscript(infile, outfile):
    '''
    Output the CDS features from an ENSEMBL gene set

    takes all of the features from a :term:`gtf` file
    that are feature types of ``CDS``.

    Note - we have not filtered on gene_biotype because some of the CDS
    are classified as polymorphic_pseudogene.

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''

    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["CDS"]

    m.filterGTF(outfile, filteroption, filteritem, operators=None)

Example #7

Show file

def buildLincRNAExonTranscript(infile, outfile):
    '''
    Output of the lincRNA features from an ENSEMBL gene set

    Takes all of the features from a :term:`gtf` file
    that are features of ``lincRNA``

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroptions = [
        PARAMS['ensembl_cgat_feature'], PARAMS['ensembl_cgat_gene_biotype']
    ]

    filteritem = ["exon", "lincRNA"]

    m.filterGTF(outfile, filteroptions, filteritem, operators="and")

Example #8

Show file

def buildmiRNonPrimaryTranscript(infile, outfile):
    '''
    This function will subset a miRbase annotation gff3 file.The GFF3
    file can be downloaded from miRbase. Make sure the annotation matches
    the genome build that you are using.

    This function will subset the GFF3 file by selecting annotations that are
    labled "miRNA". This will subset all of the non primary transcripts.
    '''

    m = PipelineGtfsubset.SubsetGFF3(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["miRNA"]

    m.filterGFF3(outfile, filteroption, filteritem)

Example #9

Show file

def buildExonTranscript(infile, outfile):
    '''
    Output of the exon features from an ENSEMBL gene set

    Takes all of the features from a :term:`gtf` file
    that are features of ``exon``

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.ini
    outfile : from ruffus
       Output filename named in pipeline.ini
    filteroption : string
       Filter option set in the piepline.ini as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["exon"]

    m.filterGTF(outfile, filteroption, filteritem, operators=None)

Example #10

Show file

File: pipeline_genesets.py Project: CGATOxford/CGATPipelines

def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles, outfile,
                                          job_memory=PARAMS["job_highmemory"])

Example #11

Show file

def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles, outfile)

Example #12

Show file

def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles,
                                          outfile,
                                          job_memory=PARAMS["job_highmemory"])

Example #13

Show file

def connectToUCSC():
    return PipelineGtfsubset.connectToUCSC(host=PARAMS["ucsc_host"],
                                           user=PARAMS["ucsc_user"],
                                           database=PARAMS["ucsc_database"])

Example #14

Show file

File: pipeline_genesets.py Project: CGATOxford/CGATPipelines

def connectToUCSC():
    return PipelineGtfsubset.connectToUCSC(
        host=PARAMS["ucsc_host"],
        user=PARAMS["ucsc_user"],
        database=PARAMS["ucsc_database"])

Example #15

Show file

def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile,
                                          outfile,
                                          job_memory=PARAMS["job_highmemory"])

Example #16

Show file

def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile, outfile)

Example #17

Show file

File: pipeline_genesets.py Project: CGATOxford/CGATPipelines

def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile, outfile,
                                       job_memory=PARAMS["job_highmemory"])

Example #18

Show file

def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile, outfile)

Example #19

Show file

def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile,
                                       outfile,
                                       job_memory=PARAMS["job_highmemory"])

Example #20

Show file

File: pipeline_genesets.py Project: CGATOxford/CGATPipelines

def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile, outfile,
                                          job_memory=PARAMS["job_highmemory"])