Esempio n. 1
0
 def inferUTRort(self):
     '''
     Infers a UTR record based on strand and coordinates compare between UTR and cds
     '''
     if not self.utr: return
     self.cds = sortArr(self.cds,0)
     firstcdsStart, firstcdsEnd = self.cds[0]
     for utrs,utre in self.utr:
         if self.strand == "+":
             if utrs < firstcdsStart:
                 self.fp_utr.append([utrs,utre])
             else:
                 self.tp_utr.append([utrs,utre])
         else:
             if utrs < firstcdsStart:
                 self.tp_utr.append([utrs,utre])
             else:
                 self.fp_utr.append([utrs,utre])
     self.fp_utr=sortArr(self.fp_utr,0)
     self.tp_utr=sortArr(self.tp_utr,0)
     return True
Esempio n. 2
0
 def inferUTR(self):
     '''
     This method would try to infer FP_UTR and TP_UTR
     even without UTR annotation for a transcript. Exons and CDSs are suffcient.
     '''
     if self.fp_utr and self.tp_utr: return
     if not self.cds: return
     self.cds = sortArr(self.cds,0)
     self.exons = sortArr(self.exons,0)
     CDSstart,CDSend = (self.cds[0][0],self.cds[-1][-1]) if self.strand == "+" else (self.cds[-1][-1],self.cds[0][0]) 
     for tmps,tmpe in self.exons:
         if self.strand == "+":
             if tmpe < CDSstart:
                 self.fp_utr.append([tmps,tmpe])
             elif tmps < CDSstart < tmpe:
                 self.fp_utr.append([tmps,CDSstart])
             elif tmps > CDSend:
                 self.tp_utr.append([tmps,tmpe])
             elif tmps < CDSend < tmpe:
                 self.tp_utr.append([CDSend,tmpe])
             else:
                 pass
                 #raise Exception("ERROR: could not determine this exon [%d,%d] is a UTR or not" % (tmps,tmpe,))
         else:
             if tmps > CDSstart:
                 self.fp_utr.append([tmps,tmpe])
             elif tmps < CDSstart < tmpe:
                 self.fp_utr.append([CDSstart,tmpe])
             elif tmpe < CDSend:
                 self.tp_utr.append([tmps,tmpe])
             elif tmps < CDSend < tmpe:
                 self.tp_utr.append([tmps,CDSend])
             else:
                 pass
                 #raise Exception("ERROR: could not determine this exon [%d,%d] is a UTR or not" % (tmps,tmpe,))
     self.fp_utr = sortArr(rmExtra(self.fp_utr),0)
     self.tp_utr = sortArr(rmExtra(self.tp_utr),0)
     return True
Esempio n. 3
0
 def inferIntron(self):
     '''Returns a list of duples containing start/end positions of introns in this transcript.'''
     self.introns = []
     if self.exons:
         intervals = self.exons
     elif self.cds:
         intervals = self.cds
     else:
         return
     for i in xrange(1,len(intervals)) :
         intron = [exons[i-1].end(), exons[i].start()]
         self.introns.append(intron)
     self.introns = sortArr(self.introns,0) 
     return True
Esempio n. 4
0
class transcript(BaseFeature):
    """
    An mRNA acts like an isoform in that it is associated with a parent gene
    and contains a number of coding sequences (CDS).
    """
    def __init__(self, chromosome, start, end, strand, feature, id, attr={}):
        BaseFeature.__init__(self, chromosome, start, end, strand, feature, attr)
        self.id          = id
        self.exons       = []
        self.biotype     = attr['biotype']
        self.feature     = feature
        self.cds         = []
        self.cdsMap      = {}
        self.start_codon = None
        self.stop_codon  = None
        self.utr         = []
        self.fp_utr      = []
        self.tp_utr      = []
        self.utrMap      = {}
        self.exonMap     = {}
        self.introns     = []
        self.attrs       = attr
        self.length      = self.end - self.start

    def addexon(self,exon):
        if exon.strand != self.strand:
            raise Exception("ERROR: strand '%s' of exon from transcript %s does not match gene strand '%s'" % (exon.strand, exon.parent, self.strand))
        if exon.chromosome != self.chromosome:
            raise Exception("ERROR: chromosome '%s' of exon from transcript %s does not match gene chromosome '%s'" % (exon.chromosome, exon.parent, self.chromosome))
        exonTuple = (exon.start,exon.end)
        try :
            ignore = self.exonMap[exonTuple]
            return False
        except KeyError,e:
            self.exonMap[exonTuple] = exon
        self.exons.append(list(exonTuple))
        self.exons = sortArr(self.exons,0,1)
        return True
Esempio n. 5
0
 def getCDSs(self):
     if self.CDSStartStat is 'unk' or self.CDSEndStat is 'unk':
     # in this case, CDSStart is equal to CDSEnd , so we will not try to infer cds boundrary     
         return
     for exonStart,exonEnd in self.exons:
         if exonEnd < self.CDSStart:
             self.utr.append([exonStart,exonEnd])
             continue
         elif exonStart < self.CDSStart < exonEnd:
             tmpStart = self.CDSStart if self.strand=='+' else self.CDSStart -3
             self.utr.append([exonStart,tmpStart]) 
             self.cds.append([self.CDSStart,exonEnd])
         elif self.CDSStart < exonEnd < self.CDSEnd:
             self.cds.append([exonStart,exonEnd])
         elif exonStart < self.CDSEnd < exonEnd:
             tmpEnd = self.CDSEnd+3 if self.strand=='+' else self.CDSEnd
             self.utr.append([tmpEnd,exonEnd])
             self.cds.append([exonStart,self.CDSEnd])
         else:
             self.utr.append([exonStart,exonEnd])
             continue
     self.cds = sortArr(self.cds,0)
     return True
Esempio n. 6
0
        self.exons = sortArr(self.exons,0,1)
        return True

    def addCDS(self, cds):
        if cds.strand != self.strand:
            raise Exception("ERROR: strand '%s' of CDS from transcript %s does not match gene strand '%s'" % (cds.strand, cds.parent, self.strand))
        if cds.chromosome != self.chromosome:
            raise Exception("ERROR: chromosome '%s' of CDS from transcript %s does not match gene chromosome '%s'" % (cds.chromosome, cds.parent, self.chromosome))
        cdsTuple = (cds.start,cds.end)
        try :
            ignore = self.cdsMap[cdsTuple]
            return False
        except KeyError,e:
            self.cdsMap[cdsTuple] = cds
        self.cds.append(list(cdsTuple))
        self.cds = sortArr(self.cds,0,1)
        return True
    
    def addUTR(self,utr):
        if utr.strand != self.strand:
            raise Exception("ERROR: strand '%s' of UTR from transcript %s does not match gene strand '%s'" % (cds.strand, cds.parent, self.strand))
        if utr.chromosome != self.chromosome :
            raise Exception("ERROR: chromosome '%s' of UTR from transcript %s does not match gene chromosome '%s'" % (cds.chromosome, cds.parent, self.chromosome))
        utrTuple = (utr.start,utr.end)
        try :
            ignore = self.utrMap[utrTuple]
            return False
        except KeyError,e:
            self.utrMap[utrTuple] = utr
        self.utr.append(list(utrTuple))
        return True
Esempio n. 7
0
 def getIntrons(self):
     for i in range(self.txExonCount-1):
         self.introns.append([self.txExonsEnd[i],self.txExonsStart[i+1]-1])
     self.introns=sortArr(self.introns,0)
     return True
Esempio n. 8
0
#print a
#print b

sys.exit(0)
import bamio
'''
idx=bamio.Tabix(sys.argv[1])

for item in idx.fetch('1',3100,5000):
        print item
        idx.close()
        a,b,c,d,e,f,g=bamio.mappingstat(sys.argv[1])
        print a,b,c,d,e,f,g
        #matplot.densityplot([a1,a2,a3,a4,a5,a6,a7,a8],['s','e','f','g','h','a','b','c'])
        '''

arr = [['1', 42, 52], ['11', 45, 78], ['2', 25, 100], ['1', 23, 78],
       ['1', 56, 89]]

print utils.sortArr(arr, 0, 1)
from format import *

f = sys.argv[1]
recs = fasta_itr(f)
print dir(recs)
print type(recs)

for rec in recs:
    print rec.id
    print rec.seq