def var1_to_var3(var1):
     refAA1 = variant.get_refAA(var1)
     newAA1 = variant.get_newAA(var1)
     pos = variant.get_pos(var1)
     refAA3 = "*" if refAA1 == "*" else SeqUtils.seq3(refAA1)
     # refAA1 = SeqUtils.seq1(refAA3)
     newAA3 = "*" if newAA1 == "*" else SeqUtils.seq3(newAA1)
     # newAA1 = SeqUtils.seq1(newAA3)
     var3 = ''.join([refAA3, str(pos), newAA3])
     return var3
Esempio n. 2
0
 def get33original_seq(self, aa_code_string):
     """."""
     if self.upper == 1:
         aa_code_string = aa_code_string.upper()
     code_original = ''
     len_sep = len(self.separator)
     i = 0
     while i < len(aa_code_string):
         aa_code = aa_code_string[i:i + 3]
         if aa_code == 3 * self.gap_char:
             aa_code_original = 3 * self.gap_char
         elif aa_code == self.unknown3:
             aa_code_original = self.unknown3
         else:
             if Raf.to_one_letter_code.has_key(aa_code):
                 aa_code_original = SeqUtils.seq3(
                     Raf.to_one_letter_code[aa_code])
                 if aa_code_original in self.blankseq3:
                     aa_code_original = self.unknown3
             else:
                 aa_code_original = self.unknown3
         code_original = code_original + aa_code_original
         i = i + 3
         if aa_code_string[i:i + len_sep] == self.separator:
             i = i + len_sep
             code_original = code_original + self.separator
     code_original = code_original.upper()
     return code_original
Esempio n. 3
0
def tbl_format(bed4_rrna, bed4_cds, bed4_trna):
    """
    tbl format :
    ---
    >refname # once
    ---
    for each term: 2line anntation
    start\tend\ttype\n\t\t\tkey\tvalue\n
    ---
    trna and rrna shows once,
    but cds show as gene and cds

    :param bed4_rrna:
    :param bed4_cds:
    :param bed4_trna:
    :return:
    """
    #sanity check
    if bed4_rrna[0][0]==bed4_cds[0][0]==bed4_trna[0][0]:
        ref=bed4_rrna[0][0]
    else:
        return "Error, annotations not from the same reference!"

    #
    type_dict={}
    for x in bed4_rrna:
        type_dict[x[3]]="rRNA"
    for x in bed4_trna:
        type_dict[x[3]]="tRNA"
    for x in bed4_cds:
        type_dict[x[3]]="CDS"

    bedall=sorted(bed4_rrna+bed4_cds+bed4_trna)

    out_l=[]

    for line in bedall:
        chro, start, end, anno=line
        if type_dict[anno]=="tRNA":

            seq3="tRNA-"+str(SeqUtils.seq3(anno))
            line2w="{start}\t{end}\t{type}\n\t\t\t{key}\t{value}\n".format(
                start=start,end=end, type="tRNA",key="product",value=seq3)

        elif type_dict[anno]=="rRNA":
            line2w="{start}\t{end}\t{type}\n\t\t\t{key}\t{value}\n".format(
                start=start,end=end, type="rRNA",key="product",value=anno)

        elif type_dict[anno]=="CDS":
            line2w_1="{start}\t{end}\t{type}\n\t\t\t{key}\t{value}\n".format(
                start=start,end=end, type="gene",key="gene",value=anno)
            line2w_2="{start}\t{end}\t{type}\n\t\t\t{key1}\t{value1}\n\t\t\t{key2}\t{value2}\n".format(
                start=start,end=end, type="CDS",
                key1="product",value1=anno,
                key2="transl_table",value2=5)
            line2w="".join([line2w_1, line2w_2])

        out_l.append(line2w)

    return out_l
Esempio n. 4
0
 def get_codes(self):
     """Returns the ordered list of aminoacid codes."""
     c = []
     for letter in self.alphabet.letters:
         if letter == 'X':
             c.append(self.unknown3)
         else:
             c.append(SeqUtils.seq3(letter).upper())
     return c
Esempio n. 5
0
 def get1letter(self, aa_code):
     """Translation from three-letter code to aminoacid letter.  Faster than get1letter_seq."""
     if self.upper == 1:
         aa_code = aa_code.upper()
     aa = self.unknown1
     for aa_letter in self.alphabet.letters:
         if SeqUtils.seq3(aa_letter).upper() == aa_code:
             aa = aa_letter
             break
     aa = aa.upper()
     return aa
Esempio n. 6
0
 def get3letter(self, aa):
     """Translation from aminoacid letter to three-letter code.  Faster than get3letter_seq."""
     if len(aa) > 1:
         return self.unknown3
     if self.upper == 1:
         aa = aa.upper()
     code = SeqUtils.seq3(aa)
     if code in self.blankseq3:
         code = self.unknown3
     code = code.upper()
     return code
Esempio n. 7
0
 def get33original(self, aa_code):
     """."""
     if self.upper == 1:
         aa_code = aa_code.upper()
     if Raf.to_one_letter_code.has_key(aa_code):
         aa_code_original = SeqUtils.seq3(Raf.to_one_letter_code[aa_code])
         if aa_code_original in self.blankseq3:
             return self.unknown3
         else:
             return aa_code_original.upper()
     else:
         return self.unknown3
Esempio n. 8
0
 def get3letter_seq(self, aa_string):
     """Simple translation from aminoacid letter string to three-letter string."""
     if self.upper == 1:
         aa_string = aa_string.upper()
     code = ''
     n = len(aa_string)
     for i in range(n):
         if aa_string[i] == self.gap_char:
             code = code + 3 * self.gap_char
         else:
             code_letter = SeqUtils.seq3(aa_string[i])
             if code_letter in self.blankseq3:
                 code = code + self.unknown3
             else:
                 code = code + code_letter
         if i < n - 1:
             code = code + self.separator
     code = code.upper()
     return code
Esempio n. 9
0
 def get1letter_seq(self, aa_code_string):
     """Translation from three-letter string to aminoacid single-letter sequence."""
     sequence = ''
     len_sep = len(self.separator)
     i = 0
     while i < len(aa_code_string):
         aa_code = aa_code_string[i:i + 3]
         if aa_code == 3 * self.gap_char:
             aa = self.gap_char
         else:
             if self.upper == 1:
                 aa_code = aa_code.upper()
             aa = self.unknown1
             for aa_letter in self.alphabet.letters:
                 if SeqUtils.seq3(aa_letter).upper() == aa_code:
                     aa = aa_letter
                     break
         sequence = sequence + aa
         i = i + 3
         if aa_code_string[i:i + len_sep] == self.separator:
             i = i + len_sep
     sequence = sequence.upper()
     return sequence
Esempio n. 10
0
     print 'sequence feasible', bsequence[
         alternative_early_anticodon_position:
         alternative_early_anticodon_position + 5]
 if f_struct[alternative_early_anticodon_position] <> '(':
     if verbose:
         print 'structure feasible', f_struct[
             alternative_early_anticodon_position:
             alternative_early_anticodon_position + 5]
     anticodon = bsequence[
         alternative_early_anticodon_position +
         1:alternative_early_anticodon_position + 4]
     if verbose: print anticodon, 'passed sructural constrains'
     aminoacid = Seq(anticodon).reverse_complement().translate(
         table=int(ttable_id))
     if verbose: print aminoacid, 'specificity'
     gname = SeqUtils.seq3(aminoacid)
     label = str(aminoacid)
     if gname in ['Ser', 'Arg', 'Gly'
                  ] and anticodon[1:3] == 'ct':
         label = label + '2'
     if ttable_id == '4' and gname == 'Arg' and anticodon[
             1:3] == 'cg':
         label = label + '1'
     if ttable_id == '13' and gname == 'Gly' and anticodon[
             1:3] == 'cc':
         label = label + '1'
     if gname == 'Ser' and anticodon[1:3] == 'ga':
         label = label + '1'
     if gname == 'Leu' and anticodon[2] == 'g':
         label = label + '2'
     if gname == 'Leu' and anticodon[2] == 'a':
number_of_sequences = pd.DataFrame(
    [[sys.argv[1], len(sequences_a)], [sys.argv[2],
                                       len(sequences_b)]],
    columns=["Filename", "Number of Sequences"])

ax = sns.barplot(x="Filename",
                 y="Number of Sequences",
                 data=number_of_sequences)
plt.title("Number of Sequences")
plt.savefig("plotNumberOfSequences.pdf")
plt.clf()
# plt.show()

# Box plot showing the number of each amino acid per protein
for aa in amino_acids:
    aa_three_letter = SeqUtils.seq3(aa)

    amino_acids_count = []

    for protein in sequences_a:
        amino_acids_count.append([sys.argv[1], str(protein.seq).count(aa)])

    for protein in sequences_b:
        amino_acids_count.append([sys.argv[2], str(protein.seq).count(aa)])

    amino_acids_count = pd.DataFrame(
        amino_acids_count,
        columns=["Filename", "Number of " + aa_three_letter + " residues"])

    ax = sns.boxplot(x="Filename",
                     y="Number of " + aa_three_letter + " residues",