Exemple #1
0
    def isWellFormed(self,seq):
        """if(transcript.isWellFormed(sequence)) ...
        This procedure iterates through the codons of this transcript,
        starting at the start codon (attribute startCodon specifies this
        offset within the transcript, not counting intron bases), and
        continuing until either an in-frame stop codon is encountered,
        or the end of the transcript is reached.  The transcript is
        considered well-formed only if a stop-codon is encountered.
        """
        stopCodons=self.stopCodons

        # 1. Check whether any exons overlap each other
        exons=self.exons
        numExons=len(exons)
        for i in range(1,numExons):
            exon=exons[i]
            prevExon=exons[i-1]
            if(exon.overlaps(prevExon)): return 0

        # 2. Check that there is an in-frame stop-codon
        iterator=CodonIterator(self,seq,stopCodons)
        codons=iterator.getAllCodons()
        n=len(codons)
        if(n==0): return False
        lastCodon=codons[n-1]
        isStop=stopCodons.get(lastCodon.triplet,None)
        return True if isStop else False
Exemple #2
0
 def trimUTR(self,axisSequenceRef):
     self.adjustOrders()
     stopCodons=self.stopCodons
     sequence=self.sequence
     strand=self.strand
     numExons=self.numExons()
     startCodon=self.startCodon
     if(not startCodon):
         raise Exception("can't trim UTR, because startCodon is not set")
     for j in(numExons):
         exon=self.getIthExon(j)
         length=exon.getLength()
         if(length<=startCodon):
             self.deleteExon(j)
             numExons-=1
             j-=1
             startCodon-=length
             self.adjustOrders() ### 4/1/03
         else:
             if(strand=="+"):
                 exon.trimInitialPortion(startCodon)
                 self.begin=exon.begin
             else:
                 exon.trimInitialPortion(startCodon)### ???
                 self.end=exon.end
             exon.type="initial-exon" if numExons>1 else "single-exon"
             self.startCodon=0
             break
     
     # Find in-frame stop codon
     codonIterator=CodonIterator(self,axisSequenceRef,stopCodons)
     stopCodonFound=False
     while(True):
         codon=codonIterator.nextCodon()
         if(not codon): break
         if(stopCodons.get(codon.triplet,None)):
             exon=codon.exon
             coord=codon.absoluteCoord
             trimBases=0
             if(strand=="+"): trimBases=exon.end-coord-3
             else: trimBases=coord-exon.begin-3
             if(trimBases>=0):
                 exon.trimFinalPortion(trimBases)
                 exon.type="single-exon" if exon.order==0 else "final-exon"
                 j=numExons-1
                 while(j>exon.order):
                     self.deleteExon(j)
                     j-=1
                 stopCodonFound=True
                 break
             else: # codon is interrupted; trim the next exon
                 nextExon=self.getSuccessor(exon)
                 if(not nextExon):
                     raise Exception("exon successor not found")
                 nextExon.trimFinalPortion(nextExon.getLength()+trimBases)
                 nextExon.type="final-exon"
                 j=numExons-1
                 while(j>nextExon.order):
                     self.deleteExon(j)
                     j-=1
                 stopCodonFound=True
                 break	
     if(not stopCodonFound):
         ### sometimes the GFF coords don't include the stop codon...
         numExons=self.numExons()
         lastExon=self.getIthExon(numExons-1)
         lastExonEnd=lastExon.getEnd()
         seq=axisSequenceRef
         if(strand=="+"):
             stopCodonBegin=lastExonEnd
             stopCodon=seq[stopCodonBegin:stopCodonBegin+3]
             if(stopCodon!="TAG" and stopCodon!="TAA" and stopCodon!="TGA"):
                 print("Warning!  No stop codon found for",
                       self.transcriptId,self.strand,
                       "strand , unable to trim UTR")
         else: # strand="-"
             stopCodonBegin=lastExon.getBegin()-3
             stopCodon=seq[stopCodonBegin,stopCodonBegin+3]
             stopCodon=Translation.reverseComplement(stopCodon)
             if(stopCodon!="TAG" and stopCodon!= "TAA" and stopCodon!="TGA"):
                 print("Warning!  No stop codon found for",
                       self.transcriptId,self.strand,
                       "strand , unable to trim UTR")
     self.recomputeBoundaries()