def var1_to_var3(var1): refAA1 = variant.get_refAA(var1) newAA1 = variant.get_newAA(var1) pos = variant.get_pos(var1) refAA3 = "*" if refAA1 == "*" else SeqUtils.seq3(refAA1) # refAA1 = SeqUtils.seq1(refAA3) newAA3 = "*" if newAA1 == "*" else SeqUtils.seq3(newAA1) # newAA1 = SeqUtils.seq1(newAA3) var3 = ''.join([refAA3, str(pos), newAA3]) return var3
def get33original_seq(self, aa_code_string): """.""" if self.upper == 1: aa_code_string = aa_code_string.upper() code_original = '' len_sep = len(self.separator) i = 0 while i < len(aa_code_string): aa_code = aa_code_string[i:i + 3] if aa_code == 3 * self.gap_char: aa_code_original = 3 * self.gap_char elif aa_code == self.unknown3: aa_code_original = self.unknown3 else: if Raf.to_one_letter_code.has_key(aa_code): aa_code_original = SeqUtils.seq3( Raf.to_one_letter_code[aa_code]) if aa_code_original in self.blankseq3: aa_code_original = self.unknown3 else: aa_code_original = self.unknown3 code_original = code_original + aa_code_original i = i + 3 if aa_code_string[i:i + len_sep] == self.separator: i = i + len_sep code_original = code_original + self.separator code_original = code_original.upper() return code_original
def tbl_format(bed4_rrna, bed4_cds, bed4_trna): """ tbl format : --- >refname # once --- for each term: 2line anntation start\tend\ttype\n\t\t\tkey\tvalue\n --- trna and rrna shows once, but cds show as gene and cds :param bed4_rrna: :param bed4_cds: :param bed4_trna: :return: """ #sanity check if bed4_rrna[0][0]==bed4_cds[0][0]==bed4_trna[0][0]: ref=bed4_rrna[0][0] else: return "Error, annotations not from the same reference!" # type_dict={} for x in bed4_rrna: type_dict[x[3]]="rRNA" for x in bed4_trna: type_dict[x[3]]="tRNA" for x in bed4_cds: type_dict[x[3]]="CDS" bedall=sorted(bed4_rrna+bed4_cds+bed4_trna) out_l=[] for line in bedall: chro, start, end, anno=line if type_dict[anno]=="tRNA": seq3="tRNA-"+str(SeqUtils.seq3(anno)) line2w="{start}\t{end}\t{type}\n\t\t\t{key}\t{value}\n".format( start=start,end=end, type="tRNA",key="product",value=seq3) elif type_dict[anno]=="rRNA": line2w="{start}\t{end}\t{type}\n\t\t\t{key}\t{value}\n".format( start=start,end=end, type="rRNA",key="product",value=anno) elif type_dict[anno]=="CDS": line2w_1="{start}\t{end}\t{type}\n\t\t\t{key}\t{value}\n".format( start=start,end=end, type="gene",key="gene",value=anno) line2w_2="{start}\t{end}\t{type}\n\t\t\t{key1}\t{value1}\n\t\t\t{key2}\t{value2}\n".format( start=start,end=end, type="CDS", key1="product",value1=anno, key2="transl_table",value2=5) line2w="".join([line2w_1, line2w_2]) out_l.append(line2w) return out_l
def get_codes(self): """Returns the ordered list of aminoacid codes.""" c = [] for letter in self.alphabet.letters: if letter == 'X': c.append(self.unknown3) else: c.append(SeqUtils.seq3(letter).upper()) return c
def get1letter(self, aa_code): """Translation from three-letter code to aminoacid letter. Faster than get1letter_seq.""" if self.upper == 1: aa_code = aa_code.upper() aa = self.unknown1 for aa_letter in self.alphabet.letters: if SeqUtils.seq3(aa_letter).upper() == aa_code: aa = aa_letter break aa = aa.upper() return aa
def get3letter(self, aa): """Translation from aminoacid letter to three-letter code. Faster than get3letter_seq.""" if len(aa) > 1: return self.unknown3 if self.upper == 1: aa = aa.upper() code = SeqUtils.seq3(aa) if code in self.blankseq3: code = self.unknown3 code = code.upper() return code
def get33original(self, aa_code): """.""" if self.upper == 1: aa_code = aa_code.upper() if Raf.to_one_letter_code.has_key(aa_code): aa_code_original = SeqUtils.seq3(Raf.to_one_letter_code[aa_code]) if aa_code_original in self.blankseq3: return self.unknown3 else: return aa_code_original.upper() else: return self.unknown3
def get3letter_seq(self, aa_string): """Simple translation from aminoacid letter string to three-letter string.""" if self.upper == 1: aa_string = aa_string.upper() code = '' n = len(aa_string) for i in range(n): if aa_string[i] == self.gap_char: code = code + 3 * self.gap_char else: code_letter = SeqUtils.seq3(aa_string[i]) if code_letter in self.blankseq3: code = code + self.unknown3 else: code = code + code_letter if i < n - 1: code = code + self.separator code = code.upper() return code
def get1letter_seq(self, aa_code_string): """Translation from three-letter string to aminoacid single-letter sequence.""" sequence = '' len_sep = len(self.separator) i = 0 while i < len(aa_code_string): aa_code = aa_code_string[i:i + 3] if aa_code == 3 * self.gap_char: aa = self.gap_char else: if self.upper == 1: aa_code = aa_code.upper() aa = self.unknown1 for aa_letter in self.alphabet.letters: if SeqUtils.seq3(aa_letter).upper() == aa_code: aa = aa_letter break sequence = sequence + aa i = i + 3 if aa_code_string[i:i + len_sep] == self.separator: i = i + len_sep sequence = sequence.upper() return sequence
print 'sequence feasible', bsequence[ alternative_early_anticodon_position: alternative_early_anticodon_position + 5] if f_struct[alternative_early_anticodon_position] <> '(': if verbose: print 'structure feasible', f_struct[ alternative_early_anticodon_position: alternative_early_anticodon_position + 5] anticodon = bsequence[ alternative_early_anticodon_position + 1:alternative_early_anticodon_position + 4] if verbose: print anticodon, 'passed sructural constrains' aminoacid = Seq(anticodon).reverse_complement().translate( table=int(ttable_id)) if verbose: print aminoacid, 'specificity' gname = SeqUtils.seq3(aminoacid) label = str(aminoacid) if gname in ['Ser', 'Arg', 'Gly' ] and anticodon[1:3] == 'ct': label = label + '2' if ttable_id == '4' and gname == 'Arg' and anticodon[ 1:3] == 'cg': label = label + '1' if ttable_id == '13' and gname == 'Gly' and anticodon[ 1:3] == 'cc': label = label + '1' if gname == 'Ser' and anticodon[1:3] == 'ga': label = label + '1' if gname == 'Leu' and anticodon[2] == 'g': label = label + '2' if gname == 'Leu' and anticodon[2] == 'a':
number_of_sequences = pd.DataFrame( [[sys.argv[1], len(sequences_a)], [sys.argv[2], len(sequences_b)]], columns=["Filename", "Number of Sequences"]) ax = sns.barplot(x="Filename", y="Number of Sequences", data=number_of_sequences) plt.title("Number of Sequences") plt.savefig("plotNumberOfSequences.pdf") plt.clf() # plt.show() # Box plot showing the number of each amino acid per protein for aa in amino_acids: aa_three_letter = SeqUtils.seq3(aa) amino_acids_count = [] for protein in sequences_a: amino_acids_count.append([sys.argv[1], str(protein.seq).count(aa)]) for protein in sequences_b: amino_acids_count.append([sys.argv[2], str(protein.seq).count(aa)]) amino_acids_count = pd.DataFrame( amino_acids_count, columns=["Filename", "Number of " + aa_three_letter + " residues"]) ax = sns.boxplot(x="Filename", y="Number of " + aa_three_letter + " residues",