Exemple #1
0
def read_transcripts(genes_fn, seqnames):
    ''' Parse all the transcripts from a GTF file. '''

    stderr.write('parsing gtf ... ')

    genes = defaultdict(transcript)

    for row in gtf.gtf_file(genes_fn):
        if row.seqname not in seqnames: continue

        genes[row.attributes['transcript_id']].add_exon(row)

    stderr.write('done. ({0} transcripts)\n'.format(len(genes)))
    return genes.values()
Exemple #2
0
def read_genes(genes_fn):
    '''
    For each gene in the GTF file, compute the union of all exons, in sorted
    order.
    '''

    stderr.write('parsing GTF file ... ');

    genes = defaultdict(gene)

    for row in gtf.gtf_file(genes_fn):
        if row.feature != 'exon': continue
        if 'gene_id' not in row.attributes: continue

        genes[row.attributes['gene_id']].add_row(row)

    stderr.write('done. ({0} genes)\n'.format(len(genes)))

    for g in genes.itervalues():
        g.flatten()

    return genes
Exemple #3
0
#!/usr/bin/env python

from gtf import gtf_file
from sys import argv, stdout, stderr, stdin

if len(argv) < 2:
    stderr.write('useage: gtf_exons.py genes.gtf')

for row in gtf_file(argv[1]):
    if row.feature != 'exon': continue

    stdout.write('{seqname}\t{start}\t{end}\t{name}\t{score}\t{strand}\n'.format(
        seqname = row.seqname,
        start   = int(row.start) - 1,
        end     = row.end,
        name    = row.attributes['gene_id'],
        score   = 0,
        strand  = row.strand))