예제 #1
0
def _find_qp_or_pq_match_on_orfobj(exon,
                                   orfObj,
                                   min_identity_count=1,
                                   max_dissimilar_count=0):
    """ """
    tinyexonmatches = []
    protseq = exon.proteinsequence()
    protlen = len(protseq)
    for offset in range(0, orfObj.protein_length - protlen):
        seqpart = orfObj.protein_sequence[offset:offset + protlen]
        match = make_alignment_match(protseq,
                                     seqpart,
                                     matrix=TINYEXON_MATRIX.matrix)
        if match.count(" ") <= max_dissimilar_count and\
        match.count("*") >= min_identity_count:
            aapos = orfObj.protein_startPY + offset
            dnapos = orfObj.aapos2dnapos(aapos)
            if exon.acceptor.phase == 2: dnapos -= 1
            if exon.acceptor.phase == 1: dnapos -= 2
            dnaseq = orfObj.inputgenomicsequence[dnapos - 2:dnapos +
                                                 exon.length + 2].upper()
            if dnaseq[0:2] == 'AG' or dnaseq[-2:] in ['GT', 'GC']:
                tinyexonmatches.append((seqpart, aapos))

    # return list of tinyexon match tuples
    return tinyexonmatches
예제 #2
0
def _are_tinyexons_similar(exonQ,exonS,min_identity_count=1,max_dissimilar_count=0):
    """ """
    if exonS.length != exonQ.length: return False
    if exonQ.donor.phase != exonS.donor.phase: return False
    if exonQ.proteinsequence() == exonS.proteinsequence(): return True
    # if here: test similarity
    match = make_alignment_match(
            exonQ.proteinsequence(),
            exonS.proteinsequence(),
            matrix=TINYEXON_MATRIX.matrix )
    if match.count(" ") <= max_dissimilar_count and\
    match.count("*") >= min_identity_count:
        return True
    else:
        return False
예제 #3
0
def _find_match_on_orfobj(exon,
                          orfObj,
                          min_identity_count=1,
                          max_dissimilar_count=0):
    """ """
    tinyexonmatches = []
    protseq = exon.proteinsequence()
    protlen = len(protseq)
    for offset in range(0, orfObj.protein_length - protlen):
        seqpart = orfObj.protein_sequence[offset:offset + protlen]
        match = make_alignment_match(protseq,
                                     seqpart,
                                     matrix=TINYEXON_MATRIX.matrix)

        if protseq == "SGWNAA" and seqpart in [
                'SGFNSA', 'SGWNAA', 'GLFNSV', 'SGFTSA', 'GGFTSA', 'GDFNAV',
                'GKFNTI', 'SGFNSA', 'GNFTTI', 'GGGSTN', 'GDFSAV', 'GKFNTI',
                'GAFTSA'
        ]:
            maxQ = TINYEXON_MATRIX.scorealignment(protseq, protseq)
            maxS = TINYEXON_MATRIX.scorealignment(seqpart, seqpart)
            print True, protseq, "'%s'" % match, seqpart, (
                TINYEXON_MATRIX.scorealignment(protseq,
                                               seqpart), maxQ, maxS), orfObj

        #if protseq == "LSPSM":
        #    maxQ = TINYEXON_MATRIX.scorealignment(protseq,protseq)
        #    maxS = TINYEXON_MATRIX.scorealignment(seqpart,seqpart)
        #    print False, protseq, "'%s'" % match, seqpart, (TINYEXON_MATRIX.scorealignment(protseq,seqpart),maxQ,maxS), orfObj


        if match.count(" ") <= max_dissimilar_count and\
        match.count("*") >= min_identity_count:
            aapos = orfObj.protein_startPY + offset
            tinyexonmatches.append((seqpart, aapos))

        elif len(seqpart) >= 5 and match.count("*") >= min_identity_count and\
        TINYEXON_MATRIX.scorealignment(protseq,seqpart) > 0 and\
        match.count(" ") <= max_dissimilar_count+1:
            # escape for longer tinyexons; relax constrain a little bit
            aapos = orfObj.protein_startPY + offset
            tinyexonmatches.append((seqpart, aapos))

        else:
            pass

    # return list of tinyexon match tuples
    return tinyexonmatches
예제 #4
0
def _are_tinyexons_similar(exonQ,
                           exonS,
                           min_identity_count=1,
                           max_dissimilar_count=0):
    """ """
    if exonS.length != exonQ.length: return False
    if exonQ.donor.phase != exonS.donor.phase: return False
    if exonQ.proteinsequence() == exonS.proteinsequence(): return True
    # if here: test similarity
    match = make_alignment_match(exonQ.proteinsequence(),
                                 exonS.proteinsequence(),
                                 matrix=TINYEXON_MATRIX.matrix)
    if match.count(" ") <= max_dissimilar_count and\
    match.count("*") >= min_identity_count:
        return True
    else:
        return False
예제 #5
0
def _find_qp_or_pq_match_on_orfobj(exon,orfObj,min_identity_count=1,max_dissimilar_count=0):
    """ """
    tinyexonmatches = []
    protseq = exon.proteinsequence()
    protlen = len(protseq)
    for offset in range(0,orfObj.protein_length-protlen):
        seqpart = orfObj.protein_sequence[offset:offset+protlen]
        match = make_alignment_match(protseq,seqpart,matrix=TINYEXON_MATRIX.matrix)
        if match.count(" ") <= max_dissimilar_count and\
        match.count("*") >= min_identity_count:
            aapos  = orfObj.protein_startPY + offset
            dnapos = orfObj.aapos2dnapos(aapos)
            if exon.acceptor.phase == 2: dnapos-=1
            if exon.acceptor.phase == 1: dnapos-=2
            dnaseq = orfObj.inputgenomicsequence[dnapos-2:dnapos+exon.length+2].upper()
            if dnaseq[0:2] == 'AG' or dnaseq[-2:] in ['GT','GC']:
                tinyexonmatches.append( (seqpart,aapos) )

    # return list of tinyexon match tuples
    return tinyexonmatches
예제 #6
0
def _find_match_on_orfobj(exon,orfObj,min_identity_count=1,max_dissimilar_count=0):
    """ """
    tinyexonmatches = []
    protseq = exon.proteinsequence()
    protlen = len(protseq)
    for offset in range(0,orfObj.protein_length-protlen):
        seqpart = orfObj.protein_sequence[offset:offset+protlen]
        match = make_alignment_match(protseq,seqpart,matrix=TINYEXON_MATRIX.matrix)

        if protseq == "SGWNAA" and seqpart in ['SGFNSA','SGWNAA','GLFNSV','SGFTSA','GGFTSA','GDFNAV','GKFNTI','SGFNSA','GNFTTI','GGGSTN','GDFSAV','GKFNTI','GAFTSA']:
            maxQ = TINYEXON_MATRIX.scorealignment(protseq,protseq)
            maxS = TINYEXON_MATRIX.scorealignment(seqpart,seqpart)
            print True, protseq, "'%s'" % match, seqpart, (TINYEXON_MATRIX.scorealignment(protseq,seqpart),maxQ,maxS), orfObj

        #if protseq == "LSPSM":
        #    maxQ = TINYEXON_MATRIX.scorealignment(protseq,protseq)
        #    maxS = TINYEXON_MATRIX.scorealignment(seqpart,seqpart)
        #    print False, protseq, "'%s'" % match, seqpart, (TINYEXON_MATRIX.scorealignment(protseq,seqpart),maxQ,maxS), orfObj


        if match.count(" ") <= max_dissimilar_count and\
        match.count("*") >= min_identity_count:
            aapos  = orfObj.protein_startPY + offset
            tinyexonmatches.append( (seqpart,aapos) )

        elif len(seqpart) >= 5 and match.count("*") >= min_identity_count and\
        TINYEXON_MATRIX.scorealignment(protseq,seqpart) > 0 and\
        match.count(" ") <= max_dissimilar_count+1:
            # escape for longer tinyexons; relax constrain a little bit
            aapos  = orfObj.protein_startPY + offset
            tinyexonmatches.append( (seqpart,aapos) )

        else:
            pass


    # return list of tinyexon match tuples
    return tinyexonmatches