Esempio n. 1
0
    def __init__(self, donor, acceptor, shared_nts, gff={}):
        self.donor = donor
        self.acceptor = acceptor
        self.length = acceptor.pos - donor.pos
        self.start = donor.pos
        self.end = acceptor.pos
        self.phase = None
        self.nt_from_stop_donor = None
        self.nt_from_stop_acceptor = None

        # check/set phase of the intron
        if self.donor.phase and self.acceptor.phase:
            if self.donor.phase == self.acceptor.phase:
                self.phase = self.donor.phase
            else:
                raise IncompatibleSpliceSitePhases
        elif self.donor.phase == 0 and self.acceptor.phase == 0:
            self.phase = 0
        else:
            # hmm... not recommended, but no phase specified on donor/acceptor
            pass
        # set/update gff data
        self._gff = {
            'fref': None,
            'fscource': 'undefined',
            'fstrand': '+',
            'fscore': ".",
            'column9data': {},
        }
        self._gff.update(gff)
        # and set some shared nt/aa data
        self.shared_nts = shared_nts
        self.shared_aa = ""
        if self.shared_nts:
            self.shared_aa = dna2prot.dna2protein(self.shared_nts)
Esempio n. 2
0
def dna2protein(_sequence,_frame):
    """
    Translate a DNA sequence into AAs for a given frame

    @type  _sequence: string
    @param _sequence: DNA seqeunce string

    @type  _frame: integer
    @param _frame: desired reading frame (0,1,2)

    @rtype:  string
    @return: Protein sequence
    """
    _untill = (len(_sequence)-_frame) % 3
    if not _untill:  _untill = len(_sequence)
    else:            _untill = -_untill
    # and do the actual translation
    return dna2prot.dna2protein(_sequence[_frame:_untill])
Esempio n. 3
0
def _get_reverse_strand_coords_and_seqs(dnaseq, start, end):
    """
    TODO ....

    @type  dnaseq: string
    @param dnaseq: DNA sequence string

    @type  start: integer
    @param start:

    @type  end: integer
    @param end:

    @rtype:  tuple
    @return: (start,end,dnarevseq,protseq)
    """
    # obtain position of the exterior stop codons on the other strand
    # take care with EOF sequence; after while loop +3; so stop at -6 end
    if end >= len(dnaseq) - 6:
        # end offset already in the danger zone of reaching EOF sequence
        # shift untill in reach (3nt offset) of EOF sequence
        while end >= len(dnaseq) - 3:
            end -= 3
    else:
        while end < len(dnaseq) - 6:
            if dnaseq[end:end + 3].upper() in REV_STOP_CODONS: break
            end += 3
    # take care with EOF sequence; after while loop -3; so stop at +6 end
    if start < 6:
        # start offset already in the danger zone of reaching EOF sequence
        # shift untill in reach (3nt offset) of EOF sequence
        while start < 3:
            start += 3
    else:
        while start >= 6:
            if dnaseq[start - 3:start].upper() in REV_STOP_CODONS: break
            start -= 3
    # get DNA & Protein sequences on the reverse strand
    start -= 3
    end += 3
    dnarevseq = _reversecomplement(dnaseq[start:end])
    protseq = dna2protein(dnarevseq)
    # return coords & sequences
    return (start, end, dnarevseq, protseq)
Esempio n. 4
0
def _get_reverse_strand_coords_and_seqs(dnaseq,start,end):
    """
    TODO ....

    @type  dnaseq: string
    @param dnaseq: DNA sequence string

    @type  start: integer
    @param start:

    @type  end: integer
    @param end:

    @rtype:  tuple
    @return: (start,end,dnarevseq,protseq)
    """
    # obtain position of the exterior stop codons on the other strand
    # take care with EOF sequence; after while loop +3; so stop at -6 end
    if end >= len(dnaseq)-6:
        # end offset already in the danger zone of reaching EOF sequence
        # shift untill in reach (3nt offset) of EOF sequence
        while end >= len(dnaseq)-3: end-=3
    else:
        while end < len(dnaseq)-6:
            if dnaseq[end:end+3].upper() in REV_STOP_CODONS: break
            end += 3
    # take care with EOF sequence; after while loop -3; so stop at +6 end
    if start < 6:
        # start offset already in the danger zone of reaching EOF sequence
        # shift untill in reach (3nt offset) of EOF sequence
        while start < 3: start += 3
    else:
        while start >= 6:
            if dnaseq[start-3:start].upper() in REV_STOP_CODONS: break
            start -= 3
    # get DNA & Protein sequences on the reverse strand
    start-=3
    end+=3
    dnarevseq = _reversecomplement(dnaseq[start:end])
    protseq   = dna2protein(dnarevseq)
    # return coords & sequences
    return (start,end,dnarevseq,protseq)
Esempio n. 5
0
    def __init__(self,donor,acceptor,shared_nts,gff={}):
        # input validation
        IsDonor(donor)
        IsAcceptor(acceptor)
        # initialization
        BasicGFF.__init__(self)
        self._gff.update(gff)
        self.donor      = donor
        self.acceptor   = acceptor
        # init by splice site objects
        self._init_by_splicesites()

        # forward compatibility with IntronConnectingOrfs
        self.nt_from_stop_donor = None
        self.nt_from_stop_acceptor = None

        # set shared nt/aa data
        self.shared_nts = shared_nts 
        self.shared_aa  = ""
        if self.shared_nts:
            self.shared_aa = dna2prot.dna2protein(self.shared_nts)
Esempio n. 6
0
    if not exons:
        for track in input[k]['gff-gene']: print track
    for warn in input[k]['warnings']:
        print "\t", warn
    print ""

# check if the DNA exon structure equals the given protein sequence (None|True|False)
from dna2prot import dna2protein
print "# test if annotated gene model equals provided protein sequence in AbfgpGeneLocusDirectory"
for k in input.keys():
    exons = input[k]['gldobj'].as_exons()
    seqp = []
    for item in exons:
        if item.__class__.__name__.find('Exon') >= 0:
            seqp.append( item.dnasequence() )
    _trans = dna2protein("".join(seqp))
    _prot  = input[k]['proteinseq'].replace('*','')
    if not _prot:
        print None, "\t", k
        continue
    else:
        print _trans == _prot, "\t", k
    if dna2protein("".join(seqp))!=input[k]['proteinseq'].replace('*',''):
        translated = dna2protein("".join(seqp))
        for offset in range(0,len(input[k]['proteinseq']),100):
            _trans = translated[offset:offset+100]
            _prot  = input[k]['proteinseq'][offset:offset+100].replace("*","")
            print "prot:", input[k]['proteinseq'][offset:offset+100]
            print "gene:", translated[offset:offset+100],
            print _trans == _prot