def buildLncRNAGeneSetStats(infile, outfile): ''' counts: no. of transcripts no. genes average number of exons per transcript average number of exons per gene no. multi-exon transcripts no. single exon transcripts no. multi-exon genes no. single exon genes in the coding and lncRNA genesets ''' outf = open(outfile, "w") outf.write("\t".join([ "no_transcripts", "no_genes", "no_exons_per_transcript", "no_exons_per_gene", "no_single_exon_transcripts", "no_multi_exon_transcripts", "no_single_exon_genes", "no_multi_exon_genes" ]) + "\n") # For pep8 purposes x = list( map(str, [ PipelineLncRNA.CounterTranscripts(infile).count(), PipelineLncRNA.CounterGenes(infile).count(), PipelineLncRNA.CounterExonsPerTranscript(infile).count(), PipelineLncRNA.CounterExonsPerGene(infile).count(), PipelineLncRNA.CounterSingleExonTranscripts(infile).count(), PipelineLncRNA.CounterMultiExonTranscripts(infile).count(), PipelineLncRNA.CounterSingleExonGenes(infile).count(), PipelineLncRNA.CounterMultiExonGenes(infile).count() ])) outf.write("\t".join(x))
def buildRefcodingGeneSetStats(infile, outfile): ''' counts: no. of transcripts no. genes average number of exons per transcript average number of exons per gene no. multi-exon transcripts no. single exon transcripts no. multi-exon genes no. single exon genes in the coding and lncRNA genesets ''' # calculate exon status for refcoding genes. tmpf = P.getTempFilename(".") + ".gz" PipelineLncRNA.flagExonStatus(infile, tmpf) outf = iotools.openFile(outfile, "w") outf.write("\t".join([ "no_transcripts", "no_genes", "no_exons_per_transcript", "no_exons_per_gene", "no_single_exon_transcripts", "no_multi_exon_transcripts", "no_single_exon_genes", "no_multi_exon_genes" ]) + "\n") outf.write("\t".join( map(str, [ PipelineLncRNA.CounterTranscripts(tmpf).count(), PipelineLncRNA.CounterGenes(tmpf).count(), PipelineLncRNA.CounterExonsPerTranscript(tmpf).count(), PipelineLncRNA.CounterExonsPerGene(tmpf).count(), PipelineLncRNA.CounterSingleExonTranscripts(tmpf).count(), PipelineLncRNA.CounterMultiExonTranscripts(tmpf).count(), PipelineLncRNA.CounterSingleExonGenes(tmpf).count(), PipelineLncRNA.CounterMultiExonGenes(tmpf).count() ]))) os.unlink(tmpf) os.unlink(tmpf + ".log") os.unlink(P.snip(tmpf, ".gz"))