def __init__(self, donor, acceptor, shared_nts, gff={}): self.donor = donor self.acceptor = acceptor self.length = acceptor.pos - donor.pos self.start = donor.pos self.end = acceptor.pos self.phase = None self.nt_from_stop_donor = None self.nt_from_stop_acceptor = None # check/set phase of the intron if self.donor.phase and self.acceptor.phase: if self.donor.phase == self.acceptor.phase: self.phase = self.donor.phase else: raise IncompatibleSpliceSitePhases elif self.donor.phase == 0 and self.acceptor.phase == 0: self.phase = 0 else: # hmm... not recommended, but no phase specified on donor/acceptor pass # set/update gff data self._gff = { 'fref': None, 'fscource': 'undefined', 'fstrand': '+', 'fscore': ".", 'column9data': {}, } self._gff.update(gff) # and set some shared nt/aa data self.shared_nts = shared_nts self.shared_aa = "" if self.shared_nts: self.shared_aa = dna2prot.dna2protein(self.shared_nts)
def dna2protein(_sequence,_frame): """ Translate a DNA sequence into AAs for a given frame @type _sequence: string @param _sequence: DNA seqeunce string @type _frame: integer @param _frame: desired reading frame (0,1,2) @rtype: string @return: Protein sequence """ _untill = (len(_sequence)-_frame) % 3 if not _untill: _untill = len(_sequence) else: _untill = -_untill # and do the actual translation return dna2prot.dna2protein(_sequence[_frame:_untill])
def _get_reverse_strand_coords_and_seqs(dnaseq, start, end): """ TODO .... @type dnaseq: string @param dnaseq: DNA sequence string @type start: integer @param start: @type end: integer @param end: @rtype: tuple @return: (start,end,dnarevseq,protseq) """ # obtain position of the exterior stop codons on the other strand # take care with EOF sequence; after while loop +3; so stop at -6 end if end >= len(dnaseq) - 6: # end offset already in the danger zone of reaching EOF sequence # shift untill in reach (3nt offset) of EOF sequence while end >= len(dnaseq) - 3: end -= 3 else: while end < len(dnaseq) - 6: if dnaseq[end:end + 3].upper() in REV_STOP_CODONS: break end += 3 # take care with EOF sequence; after while loop -3; so stop at +6 end if start < 6: # start offset already in the danger zone of reaching EOF sequence # shift untill in reach (3nt offset) of EOF sequence while start < 3: start += 3 else: while start >= 6: if dnaseq[start - 3:start].upper() in REV_STOP_CODONS: break start -= 3 # get DNA & Protein sequences on the reverse strand start -= 3 end += 3 dnarevseq = _reversecomplement(dnaseq[start:end]) protseq = dna2protein(dnarevseq) # return coords & sequences return (start, end, dnarevseq, protseq)
def _get_reverse_strand_coords_and_seqs(dnaseq,start,end): """ TODO .... @type dnaseq: string @param dnaseq: DNA sequence string @type start: integer @param start: @type end: integer @param end: @rtype: tuple @return: (start,end,dnarevseq,protseq) """ # obtain position of the exterior stop codons on the other strand # take care with EOF sequence; after while loop +3; so stop at -6 end if end >= len(dnaseq)-6: # end offset already in the danger zone of reaching EOF sequence # shift untill in reach (3nt offset) of EOF sequence while end >= len(dnaseq)-3: end-=3 else: while end < len(dnaseq)-6: if dnaseq[end:end+3].upper() in REV_STOP_CODONS: break end += 3 # take care with EOF sequence; after while loop -3; so stop at +6 end if start < 6: # start offset already in the danger zone of reaching EOF sequence # shift untill in reach (3nt offset) of EOF sequence while start < 3: start += 3 else: while start >= 6: if dnaseq[start-3:start].upper() in REV_STOP_CODONS: break start -= 3 # get DNA & Protein sequences on the reverse strand start-=3 end+=3 dnarevseq = _reversecomplement(dnaseq[start:end]) protseq = dna2protein(dnarevseq) # return coords & sequences return (start,end,dnarevseq,protseq)
def __init__(self,donor,acceptor,shared_nts,gff={}): # input validation IsDonor(donor) IsAcceptor(acceptor) # initialization BasicGFF.__init__(self) self._gff.update(gff) self.donor = donor self.acceptor = acceptor # init by splice site objects self._init_by_splicesites() # forward compatibility with IntronConnectingOrfs self.nt_from_stop_donor = None self.nt_from_stop_acceptor = None # set shared nt/aa data self.shared_nts = shared_nts self.shared_aa = "" if self.shared_nts: self.shared_aa = dna2prot.dna2protein(self.shared_nts)
if not exons: for track in input[k]['gff-gene']: print track for warn in input[k]['warnings']: print "\t", warn print "" # check if the DNA exon structure equals the given protein sequence (None|True|False) from dna2prot import dna2protein print "# test if annotated gene model equals provided protein sequence in AbfgpGeneLocusDirectory" for k in input.keys(): exons = input[k]['gldobj'].as_exons() seqp = [] for item in exons: if item.__class__.__name__.find('Exon') >= 0: seqp.append( item.dnasequence() ) _trans = dna2protein("".join(seqp)) _prot = input[k]['proteinseq'].replace('*','') if not _prot: print None, "\t", k continue else: print _trans == _prot, "\t", k if dna2protein("".join(seqp))!=input[k]['proteinseq'].replace('*',''): translated = dna2protein("".join(seqp)) for offset in range(0,len(input[k]['proteinseq']),100): _trans = translated[offset:offset+100] _prot = input[k]['proteinseq'][offset:offset+100].replace("*","") print "prot:", input[k]['proteinseq'][offset:offset+100] print "gene:", translated[offset:offset+100], print _trans == _prot