def indentifyMutationInformation(record, reference_sequence_informations): # If same pattern as reference sequence if record["pattern"] == reference_sequence_informations["pattern"]: # Save the information associated to the reference pattern return [ record["id"], reference_sequence_informations["organisme"], record["type"], reference_sequence_informations["pattern"], None, reference_sequence_informations["gene"], None, record["position"] ] # If different pattern as reference sequence else: # If a pattern variation has been identified if record["pattern"] != None: # Get the position of the initial pattern start = reference_sequence_informations["position_gene_sequence"] end = reference_sequence_informations[ "position_gene_sequence"] + len(record["pattern"]) # Generate the mutated sequence mutated_squence = "" mutated_squence = mutated_squence + reference_sequence_informations[ "nucleotide_sequence"][0:start] mutated_squence = mutated_squence + record["pattern"].replace( "-", "") mutated_squence = mutated_squence + reference_sequence_informations[ "nucleotide_sequence"][end:] rna_sequence = transcribe(mutated_squence) aa_sequence = str(rna_sequence.translate()) # Get the mutation mutations = getMutation( reference_sequence_informations["amino_acid_sequence"], aa_sequence) if len(mutations) <= 5: return [ record["id"], reference_sequence_informations["organisme"], record["type"], reference_sequence_informations["pattern"], record["pattern"], reference_sequence_informations["gene"], mutations, record["position"] ] else: return [ record["id"], reference_sequence_informations["organisme"], record["type"], reference_sequence_informations["pattern"], "Not identified", "Not identified", "Not identified", "Not identified" ] # If unidentified pattern else: return [ record["id"], reference_sequence_informations["organisme"], record["type"], reference_sequence_informations["pattern"], "Not identified", "Not identified", "Not identified", "Not identified" ]
def translator(path_to_fasta, codons_table): fasta_file = open(path_to_fasta, 'r') seq_records = parse(fasta_file, 'fasta') for seq_record in seq_records: dna_seq = seq_record.seq rna_seq = transcribe(dna_seq) protein_seq = translate(rna_seq, codons_table) yield protein_seq fasta_file.close()
def directStringSeq(): my_string = "GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG" Compl = complement(my_string) reCompl = reverse_complement(my_string) transc = transcribe(my_string) bTransc = back_transcribe(my_string) transl = translate(my_string) print('my_string = ', my_string) print('Compl = ', Compl) print('reCompl = ', reCompl) print('transc = ', transc) print('bTransc = ', bTransc) print('transl = ', transl)
def TransPageView(request): if request.method == "POST": xseq = request.POST.get('sequence') my_dna = validateseq(xseq) if my_dna == "error": messages.info(request, 'Invalid DNA sequence') return render(request, "transcription.html") else: transseq = transcribe(my_dna[0]) return render(request, "transcription.html",{ 'seq':my_dna[0] , 'transseq':transseq , }) else: return render(request,"transcription.html")
def TranslatePageView(request): if request.method == 'POST': xseq = request.POST.get('sequence') xoption = request.POST.get('gencode') my_rna = validaterna(xseq) if my_rna == "error": my_dna = validateseq(xseq) if my_dna == "error": messages.info(request,'Invalid Sequence') return render(request, "translate_base.html") else: my_new_rna = transcribe(my_dna[0]) my_translation = six_frame_translations(my_new_rna,xoption) return render( request, "translate_result.html",{ 'trans_one':my_translation[1] , 'trans_two':my_translation[2] , 'trans_three':my_translation[3] , 'my_seq':my_translation[4] , 'my_seq_comp':my_translation[5] , 'comp_one':my_translation[6] , 'comp_two':my_translation[7] , 'comp_three':my_translation[8] , }) else: my_translation = six_frame_translations(my_rna,xoption) return render( request, "translate_result.html",{ 'trans_one':my_translation[1] , 'trans_two':my_translation[2] , 'trans_three':my_translation[3] , 'my_seq':my_translation[4] , 'my_seq_comp':my_translation[5] , 'comp_one':my_translation[6] , 'comp_two':my_translation[7] , 'comp_three':my_translation[8] , }) else: return render( request, "translate_base.html")
def apply_operation(): """Do the selected operation.""" codon_table = codon_list.get(codon_list.curselection()) print(f"Code: {codon_table}") seq = "".join(input_text.get(1.0, tk.END).split()) print(f"Input sequence: {seq}") operation = transform_var.get() print(f"Operation: {operation}") if operation == "transcribe": result = transcribe(seq) elif operation == "translate": result = translate(seq, table=codon_table, to_stop=True) elif operation == "back transcribe": result = back_transcribe(seq) else: result = "" output_text.delete(1.0, tk.END) output_text.insert(tk.END, result) print(f"Result: {result}")
def apply_operation(): """Do the selected operation.""" codon_table = codon_list.get(codon_list.curselection()) print('Code: {}'.format(codon_table)) seq = ''.join(input_text.get(1.0, tk.END).split()) print('Input sequence: {}'.format(seq)) operation = transform_var.get() print('Operation: {}'.format(operation)) if operation == 'transcribe': result = transcribe(seq) elif operation == 'translate': result = translate(seq, table=codon_table, to_stop=True) elif operation == 'back transcribe': result = back_transcribe(seq) else: result = '' output_text.delete(1.0, tk.END) output_text.insert(tk.END, result) print('Result: {}'.format(result)) return
from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate my_string = "GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG" print(reverse_complement(my_string)) print(transcribe(my_string)) print(back_transcribe(my_string)) print(translate(my_string))
print unk, len(unk), type(unk) unkDNA = UnknownSeq(20, alphabet=IUPAC.ambiguous_dna) print unkDNA # N = any base unkProt = UnknownSeq(10, alphabet=IUPAC.protein) print unkProt # X = any aminoacid print unkDNA.complement(), unkDNA.reverse_complement() print unkDNA.transcribe(), unkDNA.translate() unkProt = unkDNA.translate() print unkProt, len(unkProt) #Directly on strings from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate noseq = 'GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG' print reverse_complement(noseq) # these functions print transcribe(noseq) # receive either strings print back_transcribe(noseq) # Seq, MutableSeq, UnknownSeq print translate(noseq) #SeqRecord object #.seq A Seq object #.id a string ID identifier of the sequence #.name a string common name for the sequence #.description a string readable description or complete name #.letter_annotations a dictionary of adittional info about the letters in the sequence. # The values for the keys are (list, tuple, string) with the same length of the sequence. #.annotations a dictionary of additional info about the sequence #.features a list of SeqFeature objects #.dbxrefs a list of string DB cross-references from Bio.SeqRecord import SeqRecord seq = Seq('GATC')
from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate #질문있는데 이걸 꼭 이렇게 불러야겠니 insulin = "GCATTCTGAGGCATTCTCTAACAGGTTCTCGACCCTCCGCCATGGCCCCGTGGATGCATCTCCTCACCGTGCTGGCCCTGCTGGCCCTCTGGGGACCCAACTCTGTTCAGGCCTATTCCAGCCAGCACCTGTGCGGCTCCAACCTAGTGGAGGCACTGTACATGACATGTGGACGGAGTGGCTTCTATAGACCCCACGACCGCCGAGAGCTGGAGGACCTCCAGGTGGAGCAGGCAGAACTGGGTCTGGAGGCAGGCGGCCTGCAGCCTTCGGCCCTGGAGATGATTCTGCAGAAGCGCGGCATTGTGGATCAGTGCTGTAATAACATTTGCACATTTAACCAGCTGCAGAACTACTGCAATGTCCCTTAGACACCTGCCTTGGGCCTGGCCTGCTGCTCTGCCCTGGCAACCAATAAACCCCTTGAATGAG" #Wrap 알아서 켜주면 안되냐고... insulin_rc = reverse_complement(insulin) insulin_mrna = transcribe(insulin) insulin_prot = translate(insulin) # 순서대로 reverse complement/전사/번역 print(insulin_rc) print(insulin_mrna) print(insulin_prot)
# sequence complement (only if alphabet allows complement) my_seq.complement() # reverse complement (only if alphabet allows complement) my_seq.reverse_complement() # transcribe RNA (DNA -> mRNA) #The actual biological transcription process works from the template strand, doing a reverse complement #(TCAG → CUGA) to give the mRNA. However, in Biopython and bioinformatics in general, we typically #work directly with the coding strand because this means we can get the mRNA sequence just by switching #T → U. from Bio.Seq import transcribe # just changes T with U from the coding strand (5' -> 3') messenger_rna = transcribe(coding_dna) # if we want to transcribe from the template strand (3' -> 5'): transcribe(template_dna.reverse_complement()) # transcribing back to DNA: from Bio.Seq import Seq, back_transcribe back_transcribe(messenger_rna) # just changes U -> T and gives the coding strand # 3.8 Translation (mRNA -> Protein) # Uses standard genetic code from Bio.Seq import Seq, translate from Bio.Alphabet import IUPAC messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna)
coding_dna.transcribe()#T→U coding_dna.reverse_complement().transcribe()#true_transcribe coding_dna.translate(to_stop=True,cds=True)#RNA和DNA都可以直接翻译,table参数可以选择密码子表 from Bio.Data import CodonTable standard_table=CodonTable.unambiguous_dna_by_id[1] mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"] standard_table = CodonTable.unambiguous_dna_by_name["Standard"] mito_table = CodonTable.unambiguous_dna_by_id[2] from Bio.Seq import MutableSeq mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna) ##或者 mutable_seq = my_seq.tomutable() mutable_seq[5] = "C" mutable_seq.remove("T") mutable_seq.reverse() new_seq=mutable_seq.toseq() from Bio.Seq import UnknownSeq unk_dna=UnknownSeq(20,alphabet=IUPAC.ambiguous_dna) from Bio.Seq import reverse_complement,transcribe,back_transcribe,translate my_string = "GCTGTTATGGGTCGTTGGAAGGGTGGTCGTGCTGCTGGTTAG" reverse_complement(my_string) transcribe(my_string) back_transcribe(my_string) translate(my_string)
def get_info(seq): return GC(seq), transcribe(seq), translate(seq)
# sequence complement (only if alphabet allows complement) my_seq.complement() # reverse complement (only if alphabet allows complement) my_seq.reverse_complement() # transcribe RNA (DNA -> mRNA) #The actual biological transcription process works from the template strand, doing a reverse complement #(TCAG → CUGA) to give the mRNA. However, in Biopython and bioinformatics in general, we typically #work directly with the coding strand because this means we can get the mRNA sequence just by switching #T → U. from Bio.Seq import transcribe # just changes T with U from the coding strand (5' -> 3') messenger_rna = transcribe(coding_dna) # if we want to transcribe from the template strand (3' -> 5'): transcribe(template_dna.reverse_complement()) # transcribing back to DNA: from Bio.Seq import Seq, back_transcribe back_transcribe( messenger_rna) # just changes U -> T and gives the coding strand # 3.8 Translation (mRNA -> Protein) # Uses standard genetic code from Bio.Seq import Seq, translate from Bio.Alphabet import IUPAC messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna)
from Bio.Seq import Seq from Bio.Seq import transcribe from Bio.Data import CodonTable table = CodonTable.unambiguous_dna_by_name["Standard"] print(table) DNA_Data = Seq("GGTCAGAAAAAGCCCTCTCCATGTCTACTCACGATACATCCCTGAAAACCACTGAGGAAG") transcripted_DNA = transcribe(DNA_Data) print(transcripted_DNA) translated_Data = transcripted_DNA.translate() print(translated_Data)
#Data used to produce the complement. print(IUPACData.ambiguous_dna_complement) #GC content from Bio.SeqUtils import GC nucleotide = Seq('GACTGACTTCGA', IUPAC.unambiguous_dna) print("Nucleotide: " + nucleotide) print("GC content: " + str(GC(nucleotide))) #transcription from Bio.Seq import transcribe dna_seq = Seq('ATGCCGATCGTAT', IUPAC.unambiguous_dna) print("dna_seq : " + nucleotide) print("transcibed: " + transcribe(dna_seq)) rna_seq = transcribe(dna_seq) print("rna_seq : " + rna_seq) rna_seq_back_transcribed = rna_seq.back_transcribe() print("rna_seq.bt: " + rna_seq_back_transcribed) dna_template = rna_seq_back_transcribed.reverse_complement() print("DNA templa:" + dna_template) seqt = Seq('GAGATC', IUPAC.unambiguous_dna) print(transcribe(seqt)) rna_seq = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGA", IUPAC.unambiguous_rna) print(rna_seq) print(rna_seq.translate())
def transcribe(self): seq = "".join(self.src_text.GetValue().split()) #remove whitespace print seq self.dest_text.Clear() self.dest_text.SetValue(transcribe(seq))
def main(): st.title("Transcription and Translation") menu = ["Menu", "Protein Synthesis", "Upload FASTA File"] choice = st.sidebar.selectbox("Select Activity", menu) if choice == "Menu": st.write( "Transcription and translation take the information" " in DNA and use it to produce proteins." " Translation is the process where the information carried in " "mRNA molecules is used to create proteins. The specific " "sequence of nucleotides in the mRNA molecule provide the code for" "the production of a protein with a specific sequence of amino acids." ) st.image("dnatr.gif", use_column_width=True) st.subheader("Use the sidebar to select activity") elif choice == "Protein Synthesis": st.subheader("Convert DNA to mRNA or to Protein sequence") dna_seq = st.text_input("Enter DNA sequence") if dna_seq: mRNA = transcribe(dna_seq) st.write("Transcribed Sequence is: ", mRNA) protein_seq = translate(dna_seq) st.write("Translated Sequence is: ", protein_seq) st.image("dna.jpg", use_column_width=True) elif choice == "Upload FASTA File": st.subheader("Protein sequence") sequence_file = st.file_uploader("Upload FASTA file", type=["fasta", "fa", "txt"]) if sequence_file: dna = SeqIO.read(sequence_file, "fasta") if st.checkbox("Details of sequence"): st.write(dna) if st.checkbox("Length of sequence"): st.write("length of dna sequence: ", len(dna)) dna_seq = dna.seq translated_dna = dna_seq.translate() if st.checkbox( "Translation: Each row representing an amino acid sequence" ): # split amino acids before stop codon. stop codon terminates translation AA = translated_dna.split('*') protein_sequence = [str(i) for i in AA] st.write(protein_sequence) # change to 3 letter amino acids instead of 1 letter AA if st.checkbox("View 3 letter Amino Acids"): st.write(seq3(translated_dna)) # Amino acid count AA_analysed = ProteinAnalysis(str(translated_dna)) AA_freq = AA_analysed.count_amino_acids() if st.checkbox("Amino acid Count"): st.write(AA_analysed.get_amino_acids_percent()) # Visualize the amino acid count if st.checkbox("Visualize Amino Acid count"): plt.bar(AA_freq.keys(), AA_freq.values(), color="salmon", edgecolor="black") st.pyplot()
print(format_fast_string) # Zusammenfügen bzw. Konkatenieren von Sequenzen # ACHTUNG BEIM ARBEITEN von unterschiedlichen Seqs Typ check dna_seq = Seq("ACGTA") protein_seq = Seq("EVRNAK") print ("Sum: ", protein_seq + dna_seq) print(dna_seq) print(dna_seq.complement()) print(dna_seq.reverse_complement()) # Transcription and Tanslation coding_dna = Seq("ATGGCCATTGTAATG") template_dna = coding_dna.reverse_complement() messenger_rna = transcribe(coding_dna) print(messenger_rna) print(back_transcribe(messenger_rna)) print(translate(messenger_rna)) myThirdSequence = Seq("GATCGATGGGGGCTATCC") print(GC(myThirdSequence)) # MutableSeq objects print(dna_seq) #dna_seq[0]="T" --> Nicht veränderbar! mutable_seq = dna_seq.tomutable() mutable_seq[0] = "T" print(mutable_seq)
def transcribe(self): seq = "".join(self.src_text.GetValue().split()) # remove whitespace print seq self.dest_text.Clear() self.dest_text.SetValue(transcribe(seq))
from sys import argv from Bio.Seq import translate from Bio.Seq import transcribe with open(argv[1], 'r') as in_file: in_data = in_file.read().splitlines() in_file.closed dna = in_data[0] protein = in_data[1] rna = transcribe(dna) for i in range(1,15): if not (i == 7 or i == 8): if (protein == translate(rna,table=i, stop_symbol='', to_stop=False)): print i