Ejemplo n.º 1
0
def buildLncRNAGeneSetStats(infile, outfile):
    '''
    counts:
    no. of transcripts
    no. genes
    average number of exons per transcript
    average number of exons per gene
    no. multi-exon transcripts
    no. single exon transcripts
    no. multi-exon genes
    no. single exon genes

    in the coding and lncRNA genesets
    '''
    outf = open(outfile, "w")
    outf.write("\t".join([
        "no_transcripts", "no_genes", "no_exons_per_transcript",
        "no_exons_per_gene", "no_single_exon_transcripts",
        "no_multi_exon_transcripts", "no_single_exon_genes",
        "no_multi_exon_genes"
    ]) + "\n")

    # For pep8 purposes
    x = list(
        map(str, [
            PipelineLncRNA.CounterTranscripts(infile).count(),
            PipelineLncRNA.CounterGenes(infile).count(),
            PipelineLncRNA.CounterExonsPerTranscript(infile).count(),
            PipelineLncRNA.CounterExonsPerGene(infile).count(),
            PipelineLncRNA.CounterSingleExonTranscripts(infile).count(),
            PipelineLncRNA.CounterMultiExonTranscripts(infile).count(),
            PipelineLncRNA.CounterSingleExonGenes(infile).count(),
            PipelineLncRNA.CounterMultiExonGenes(infile).count()
        ]))
    outf.write("\t".join(x))
Ejemplo n.º 2
0
    def __call__(self, track, slice=None):

        return odict((("single_exon",
                       PipelineLncRNA.CounterSingleExonGenes(
                           os.path.join("gtfs", track) + ".gtf.gz").count()),
                      ("multi_exon",
                       PipelineLncRNA.CounterMultiExonGenes(
                           os.path.join("gtfs", track) + ".gtf.gz").count())))
Ejemplo n.º 3
0
def buildRefcodingGeneSetStats(infile, outfile):
    '''
    counts:
    no. of transcripts
    no. genes
    average number of exons per transcript
    average number of exons per gene
    no. multi-exon transcripts
    no. single exon transcripts
    no. multi-exon genes
    no. single exon genes

    in the coding and lncRNA genesets
    '''

    # calculate exon status for refcoding genes.
    tmpf = P.getTempFilename(".") + ".gz"
    PipelineLncRNA.flagExonStatus(infile, tmpf)

    outf = IOTools.openFile(outfile, "w")
    outf.write("\t".join([
        "no_transcripts", "no_genes", "no_exons_per_transcript",
        "no_exons_per_gene", "no_single_exon_transcripts",
        "no_multi_exon_transcripts", "no_single_exon_genes",
        "no_multi_exon_genes"
    ]) + "\n")
    outf.write("\t".join(
        map(str, [
            PipelineLncRNA.CounterTranscripts(tmpf).count(),
            PipelineLncRNA.CounterGenes(tmpf).count(),
            PipelineLncRNA.CounterExonsPerTranscript(tmpf).count(),
            PipelineLncRNA.CounterExonsPerGene(tmpf).count(),
            PipelineLncRNA.CounterSingleExonTranscripts(tmpf).count(),
            PipelineLncRNA.CounterMultiExonTranscripts(tmpf).count(),
            PipelineLncRNA.CounterSingleExonGenes(tmpf).count(),
            PipelineLncRNA.CounterMultiExonGenes(tmpf).count()
        ])))

    os.unlink(tmpf)
    os.unlink(tmpf + ".log")
    os.unlink(P.snip(tmpf, ".gz"))