Ejemplo n.º 1
0
def importRNAAnnotationFromUCSC(outfile):
    """This task downloads UCSC repetetive RNA types.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_rnatypes"]),
        outfile=outfile,
        remove_contigs_regex=PARAMS["ncbi_remove_contigs"])
Ejemplo n.º 2
0
def importRepeatsFromUCSC(outfile):
    """This task downloads UCSC repeats types as identified
    in the configuration file.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(dbhandle=connectToUCSC(),
                                            repclasses=P.asList(
                                                PARAMS["ucsc_repeattypes"]),
                                            outfile=outfile)
Ejemplo n.º 3
0
def importRNAAnnotationFromUCSC(outfile):
    """This task downloads UCSC repetetive RNA types.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_rnatypes"]),
        outfile=outfile,
        remove_contigs_regex=PARAMS["ncbi_remove_contigs"],
        job_memory=PARAMS["job_memory"])
Ejemplo n.º 4
0
def importRepeatsFromUCSC(outfile):
    """This task downloads UCSC repeats types as identified
    in the configuration file.
    """
    PipelineGtfsubset.getRepeatDataFromUCSC(
        dbhandle=connectToUCSC(),
        repclasses=P.asList(PARAMS["ucsc_repeattypes"]),
        outfile=outfile,
        job_memory=PARAMS["job_memory"])
Ejemplo n.º 5
0
def buildNonCodingExonTranscript(infile, outfile):
    '''
    Output of the non-coding exon features from an ENSEMBL gene set

    Remove all of the features from a :term:`gtf` file
    that are features of ``exon`` and are protein-coding

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroptions = [
        PARAMS['ensembl_cgat_feature'], PARAMS['ensembl_cgat_gene_biotype']
    ]
    filteritem = ["exon", "protein_coding"]

    m.filterGTF(outfile, filteroptions, filteritem, operators="and not")
Ejemplo n.º 6
0
def buildCdsTranscript(infile, outfile):
    '''
    Output the CDS features from an ENSEMBL gene set

    takes all of the features from a :term:`gtf` file
    that are feature types of ``CDS``.

    Note - we have not filtered on gene_biotype because some of the CDS
    are classified as polymorphic_pseudogene.

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''

    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["CDS"]

    m.filterGTF(outfile, filteroption, filteritem, operators=None)
Ejemplo n.º 7
0
def buildLincRNAExonTranscript(infile, outfile):
    '''
    Output of the lincRNA features from an ENSEMBL gene set

    Takes all of the features from a :term:`gtf` file
    that are features of ``lincRNA``

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.yml
    outfile : from ruffus
       Output filename named in pipeline.yml
    filteroption : string
       Filter option set in the piepline.yml as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroptions = [
        PARAMS['ensembl_cgat_feature'], PARAMS['ensembl_cgat_gene_biotype']
    ]

    filteritem = ["exon", "lincRNA"]

    m.filterGTF(outfile, filteroptions, filteritem, operators="and")
Ejemplo n.º 8
0
def buildmiRNonPrimaryTranscript(infile, outfile):
    '''
    This function will subset a miRbase annotation gff3 file.The GFF3
    file can be downloaded from miRbase. Make sure the annotation matches
    the genome build that you are using.

    This function will subset the GFF3 file by selecting annotations that are
    labled "miRNA". This will subset all of the non primary transcripts.
    '''

    m = PipelineGtfsubset.SubsetGFF3(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["miRNA"]

    m.filterGFF3(outfile, filteroption, filteritem)
Ejemplo n.º 9
0
def buildExonTranscript(infile, outfile):
    '''
    Output of the exon features from an ENSEMBL gene set

    Takes all of the features from a :term:`gtf` file
    that are features of ``exon``

    Arguments
    ---------
    infile : from ruffus
       ENSEMBL geneset, filename named in pipeline.ini
    outfile : from ruffus
       Output filename named in pipeline.ini
    filteroption : string
       Filter option set in the piepline.ini as feature column in GTF
       nomenclature
    '''
    m = PipelineGtfsubset.SubsetGTF(infile)

    filteroption = PARAMS['ensembl_cgat_feature']
    filteritem = ["exon"]

    m.filterGTF(outfile, filteroption, filteritem, operators=None)
Ejemplo n.º 10
0
def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles, outfile,
                                          job_memory=PARAMS["job_highmemory"])
Ejemplo n.º 11
0
def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles, outfile)
Ejemplo n.º 12
0
def buildGenomicContext(infiles, outfile):
    PipelineGtfsubset.buildGenomicContext(infiles,
                                          outfile,
                                          job_memory=PARAMS["job_highmemory"])
Ejemplo n.º 13
0
def connectToUCSC():
    return PipelineGtfsubset.connectToUCSC(host=PARAMS["ucsc_host"],
                                           user=PARAMS["ucsc_user"],
                                           database=PARAMS["ucsc_database"])
Ejemplo n.º 14
0
def connectToUCSC():
    return PipelineGtfsubset.connectToUCSC(
        host=PARAMS["ucsc_host"],
        user=PARAMS["ucsc_user"],
        database=PARAMS["ucsc_database"])
Ejemplo n.º 15
0
def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile,
                                          outfile,
                                          job_memory=PARAMS["job_highmemory"])
Ejemplo n.º 16
0
def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile, outfile)
Ejemplo n.º 17
0
def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile, outfile,
                                       job_memory=PARAMS["job_highmemory"])
Ejemplo n.º 18
0
def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile, outfile)
Ejemplo n.º 19
0
def buildFlatGeneSet(infile, outfile):
    PipelineGtfsubset.buildFlatGeneSet(infile,
                                       outfile,
                                       job_memory=PARAMS["job_highmemory"])
Ejemplo n.º 20
0
def loadGeneInformation(infile, outfile):
    '''load the transcript set.'''
    PipelineGtfsubset.loadGeneInformation(infile, outfile,
                                          job_memory=PARAMS["job_highmemory"])