Esempio n. 1
0
def get_transcripts_at_interval(chrom, start, end, strand, exon_trees):
    features = []
    for hit in exon_trees[chrom].find(start, end):
        txlist = hit.value
        for g,exon_num in txlist:
            features.extend(g for g in txlist if cmp_strand(g.strand, strand))
    return features
def get_transcripts_at_interval(chrom, start, end, strand, exon_trees):
    features = []
    for hit in exon_trees[chrom].find(start, end):
        txlist = hit.value
        for g, exon_num in txlist:
            features.extend(g for g in txlist if cmp_strand(g.strand, strand))
    return features
Esempio n. 3
0
def get_genes_at_interval(chrom, start, end, strand, trees):
    features = []
    for hit in trees[chrom].find(start, end):
        txlist = hit.value
        # check for compatibility with overlapping genes
        # TODO: debug strand-specific libraries such that
        # antisense reads are not counted as part of overlapping
        # sense genes. this should work but needs testing
        features.extend(g for g in txlist if cmp_strand(g.strand, strand))
    return features
def get_genes_at_interval(chrom, start, end, strand, trees):
    features = []
    for hit in trees[chrom].find(start, end):
        txlist = hit.value
        # check for compatibility with overlapping genes
        # TODO: debug strand-specific libraries such that
        # antisense reads are not counted as part of overlapping
        # sense genes. this should work but needs testing
        features.extend(g for g in txlist if cmp_strand(g.strand, strand))
    return features
def get_transcripts_at_interval(chrom, start, end, strand, exon_intervals, exon_trees):
    txsegs = []
    for hit in exon_trees[chrom].find(start, end):
        txlist = hit.value
        # check for compatibility with overlapping genes
        for g,exon_num in txlist:
            g_exon_start, g_exon_end = g.exons[exon_num]            
            #print 'ALN', g.gene_name, g.tx_name, 'enum', exon_num, g.strand, strand, g_exon_start, g_exon_end, start, end            
            # strand must be compatible
            if not cmp_strand(g.strand, strand):
                continue
            # get exon coordinates
            g_exon_start, g_exon_end = g.exons[exon_num]            
            e_start = max(start, g_exon_start) - g_exon_start
            e_end = min(end, g_exon_end) - g_exon_start
            e_start_overhang = max(0, g_exon_start - start)
            e_end_overhang = max(0, end - g_exon_end)
            # get transcript coordinates
            tx_start = sum((end-start) for start,end in g.exons[:exon_num])
            tx_start = tx_start + e_start
            tx_end = tx_start + (end - start)
            # convert to negative strand if necessary
            if g.strand == NEG_STRAND:               
                tx_length = sum((end - start) for start,end in g.exons)
                tx_end, tx_start = (tx_length - tx_start, tx_length - tx_end)
                exon_num = len(g.exons) - exon_num
            txsegs.append(TranscriptAlignment(g=g, 
                                              exon_num=exon_num,
                                              chrom=chrom,
                                              start=start,
                                              end=end,                                               
                                              e_start=e_start, 
                                              e_end=e_end,
                                              e_start_overhang=e_start_overhang,
                                              e_end_overhang=e_end_overhang,
                                              tx_start=tx_start,
                                              tx_end=tx_end))
    return txsegs