Ejemplo n.º 1
0
def Translation(seq):
    '''translate DNA to protein'''
    length = len(seq) / 3
    protein = ''
    for i in range(length):
        if Codon2AA2(seq[(i * 3):((i + 1) * 3)]) == 'J':  ## stop codon use *
            tmpAA = '*'
        elif Codon2AA2(seq[(i * 3):((i + 1) * 3)]) == 'Z':  ## IUPAC code
            #print seq[(i*3):((i+1)*3)]
            tmp_3mer_list = IUPAC_3mer(seq[i * 3:(i + 1) * 3])
            tmp_aa_list = []
            for tmp_3mer in tmp_3mer_list:
                tmp_aa_list.append(Codon2AA2(tmp_3mer))
            if len(set(tmp_aa_list)) > 1:
                tmpAA = 'X'  ## X represents any aa
            elif len(set(tmp_aa_list)) == 1:
                tmpAA = tmp_aa_list[0]
            else:
                tmpAA = '*'
        else:
            tmpAA = Codon2AA2(seq[i * 3:(i + 1) * 3])

        protein += tmpAA

    return protein
Ejemplo n.º 2
0
def IUPAC_3mer(seq):
    '''Return a list of all possible 3mers of the sequence'''

    kmer_list = []
    for dna1 in _IUPAC[seq[0]]:
        for dna2 in _IUPAC[seq[1]]:
            for dna3 in _IUPAC[seq[2]]:
                if Codon2AA2(dna1+dna2+dna3) != "J":
                    kmer_list.append(dna1+dna2+dna3)
    return kmer_list
Ejemplo n.º 3
0
def GetEDP(seq, transcript_len):
    '''get features including: ORF length, ORF ratio, ORF EDP of codon'''

    # entropy density
    Codon = {}
    for aa in _AA_list:
        Codon[aa] = 1e-9

    sum_codon = 1e-9 * 20

    if (len(seq) > 3):
        num = len(seq) / 3
        for i in range(0, num):
            if Codon2AA2(seq[i * 3:(i + 1) * 3]) == "J":
                continue
            # consider the IUPAC codon
            elif Codon2AA2(seq[i * 3:(i + 1) * 3]) == "Z":
                tmp_kmer_list = IUPAC_3mer(seq[i * 3:(i + 1) * 3])
                for tmp_kmer in tmp_kmer_list:
                    Codon[Codon2AA2(tmp_kmer)] += 1.0 / len(tmp_kmer_list)
                sum_codon += 1.0
            else:
                Codon[Codon2AA2(seq[i * 3:(i + 1) * 3])] += 1.0
                sum_codon += 1.0

        H = 0.0
        for (k, v) in Codon.items():
            Codon[k] /= sum_codon
            Codon[k] = -Codon[k] * np.log2(Codon[k])
            H += Codon[k]

        EDP = {}
        for (k, v) in Codon.items():
            EDP[k] = Codon[k] / H
            if EDP[k] < 1e-7:
                EDP[k] = 0

        outline = ''
        for (k, v) in EDP.items():
            outline += str(v) + "\t"

        return outline.strip()
Ejemplo n.º 4
0
def SixMer2AA(seq):
    '''Convert 6mer to 2 AA'''

    return Codon2AA2( seq[0:3] ) + Codon2AA2( seq[3:6] )