Example #1
0
def main():
    image = Image.open('b2.png')
    st.image(image, width=200)
    st.title("Trisakti Bioinformatics Application")
    st.title("Powered by Python")

    menu = ["Intro", "DNA Sequence", "DotPlot", "About"]
    choice = st.sidebar.selectbox("Select Activity", menu)

    if choice == "Intro":
        st.subheader("Intro to BioInformatics")
        image = Image.open('dna.png')
        st.image(image, width=800)

        st.subheader("Bioinformatics")
        st.subheader(
            "Bioinformatika adalah ilmu yang mempelajari penerapan teknik komputasional untuk mengelola dan menganalisis informasi biologis. Bidang ini mencakup penerapan metode matematika, statistika, dan informatika untuk memecahkan masalah-masalah biologis, terutama dengan menggunakan sekuens DNA dan asam amino serta informasi yang berkaitan dengannya. Contoh topik utama bidang ini meliputi basis data untuk mengelola informasi biologis, penyejajaran sekuens (sequence alignment), prediksi struktur untuk meramalkan bentuk struktur protein maupun struktur sekunder RNA, analisis filogenetik, dan analisis ekspresi gen"
        )

        st.subheader("DNA")
        st.subheader(
            "Asam deoksiribonukleat, lebih dikenal dengan singkatan DNA (bahasa Inggris: deoxyribonucleic acid), adalah sejenis biomolekul yang menyimpan dan menyandi instruksi-instruksi genetika setiap organisme dan banyak jenis virus. Instruksi-instruksi genetika ini berperan penting dalam pertumbuhan, perkembangan, dan fungsi organisme dan virus. DNA merupakan asam nukleat; bersamaan dengan protein dan karbohidrat, asam nukleat adalah makromolekul esensial bagi seluruh makhluk hidup yang diketahui. Kebanyakan molekul DNA terdiri dari dua unting biopolimer yang berpilin satu sama lainnya membentuk heliks ganda. Dua unting DNA ini dikenal sebagai polinukleotida karena keduanya terdiri dari satuan-satuan molekul yang disebut nukleotida. Tiap-tiap nukleotida terdiri atas salah satu jenis basa nitrogen (guanina (G), adenina (A), timina (T), atau sitosina (C)), gula monosakarida yang disebut deoksiribosa, dan gugus fosfat. Nukleotida-nukelotida ini kemudian tersambung dalam satu rantai ikatan kovalen antara gula satu nukleotida dengan fosfat nukelotida lainnya. Hasilnya adalah rantai punggung gula-fosfat yang berselang-seling. Menurut kaidah pasangan basa (A dengan T dan C dengan G), ikatan hidrogen mengikat basa-basa dari kedua unting polinukleotida membentuk DNA unting ganda."
        )

    elif choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")

        seq_file = st.file_uploader("Upload FASTA File",
                                    type=["fasta", "fa", "txt"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            # Frekuensi Nucleotide
            st.subheader("Nucleotide Frequency")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("Thymine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            cytosil_color = st.beta_color_picker("Cytosil Color")

            if st.button("Plot Frequency"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[2].set_color(adenine_color)
                barlist[3].set_color(thymine_color)
                barlist[1].set_color(guanine_color)
                barlist[0].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content": gc_score, "AT Content": at_score})

            # Count Nucleotide
            nt_count = st.text_input("Enter Nucleotide",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is ::{}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())

            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())

            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())

            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)

            elif st.checkbox("Plot AA Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                #barlist = plt.bar(aa_freq.keys(),aa_freq.values())
                #barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("=========================")
                st.write(aa3)

                st.write("=========================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "DotPlot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file1 = st.file_uploader("Upload 1st FASTA File",
                                     type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq
            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record1.description)
                st.write("=====================")
                st.write(dna_record2.description)
            elif details == "Sequence":
                st.write(dna_record1.seq)
                st.write("=====================")
                st.write(dna_record2.seq)
            cus_limit = st.number_input("Select Max number of Nucleotide", 100,
                                        40000, 10000)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing the first {} Nucleotide of the Two Sequences".
                    format(cus_limit))
                dotplotx(dna_seq1[0:cus_limit], dna_seq2[0:cus_limit])

                st.pyplot()
            elif st.button("Similarity"):
                st.write(
                    "Similarity of Comparing the first {} Nucleotide of the Two Sequences"
                    .format(cus_limit))
                r = pairwise2.align.globalxx(dna_seq1[0:cus_limit],
                                             dna_seq2[0:cus_limit],
                                             one_alignment_only=True,
                                             score_only=True)
                r / len(dna_seq1[0:cus_limit]) * 100

    elif choice == "About":
        st.subheader(
            "Website ini dalam tahap pengembangan & digunakan untuk project penelitian."
        )
        st.subheader("contact : hafiz065001600009.trisakti.ac.id")
Example #2
0
def main():
    st.title("Bioinformatics App")
    st.set_option('deprecation.showfileUploaderEncoding', False)

    activity = [
        'Intro', 'SequenceAnalysis', 'DotPlot', 'ProteinSearch',
        "MoleculeVisualizer", "ChemicalSearch"
    ]
    choice = st.sidebar.selectbox("Select Activity", activity)
    if choice == 'Intro':
        st.subheader("Intro")
        st.write(
            """ This is a bioinformatics web app made with Python and Streamlit. Use the left panel dropdown to choose the various features to use."""
        )
        image = Image.open("overviewpicture.png")
        st.image(image, use_column_width=True)

    elif choice == "SequenceAnalysis":
        st.subheader("DNA Sequence Analysis")

        seq_file = st.file_uploader("Upload FASTA File", type=["fasta", "fa"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            # st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            # Nucleotide Frequencies
            st.subheader("Nucleotide Frequency")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("thymine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            cytosil_color = st.beta_color_picker("cytosil Color")

            if st.button("Plot Freq"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[2].set_color(adenine_color)
                barlist[3].set_color(thymine_color)
                barlist[1].set_color(guanine_color)
                barlist[0].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content": gc_score, "AT Content": at_score})

            # Nucleotide Count
            nt_count = st.text_input("Enter Nucleotide Here",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is ::{}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())

            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())

            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())

            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)

            elif st.checkbox("Plot AA Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                # barlist = plt.bar(aa_freq.keys(),aa_freq.values(),color=aa_color)
                # barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("=====================")
                st.write(aa3)

                st.write("=====================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "ProteinSearch":
        st.subheader("Search for Papers Related to a Protein")
        st.write(""" Try entering ACE2 and coronavirus!""")

        ace2 = st.text_input("Query Protein")
        disease = st.text_input(
            "Query Specifier (more specific thing to narrow down papers with)")

        if ace2 and disease is not None:
            protein = req.get(
                'https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=10&gene='
                + ace2 + '&organism=h**o%20sapiens',
                headers={'Accept': "application/json"})
            for i, v in enumerate(protein.json()[0]['references']):
                counter = 1
                try:
                    title = protein.json(
                    )[0]['references'][i]['citation']['title']
                    if counter == 10:
                        break

                    if title.find(disease) != -1:
                        st.write(title)
                        counter += 1
                except:
                    pass

    elif choice == "DotPlot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file1 = st.file_uploader("Upload 1st FASTA File",
                                     type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record1.description)
                st.write("=====================")
                st.write(dna_record2.description)
            elif details == "Sequence":
                st.write(dna_record1.seq)
                st.write("=====================")
                st.write(dna_record2.seq)

            cus_limit = st.number_input("Select Max number of Nucleotide", 10,
                                        200, 50)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing the first {} Nucleotide of the Two Sequences".
                    format(cus_limit))
                dotplotx(dna_seq1[0:cus_limit], dna_seq2[0:cus_limit])

                st.pyplot()

    elif choice == "MoleculeVisualizer":
        st.subheader(
            "Look at a molecule! Pre-loaded example is the Covid-19 Spike Protein. Thank you to: https://github.com/napoles-uach/streamlit_3dmol"
        )

        component_3dmol()

    elif choice == "ChemicalSearch":
        st.title(
            "Search for chemicals and get info. Pre-loaded example: imatinib")
        user_compound = st.text_input("Enter compound name", 'imatinib')
        if user_compound is not None:
            results = pcp.get_compounds(user_compound, 'name')
            for compound in results:
                st.write('Compound ID: ' + str(compound.cid))
                st.write('SMILES: ' + compound.isomeric_smiles)

                vioxx = Compound.from_cid(compound.cid)
                st.write('Molecular Formula: ' + vioxx.molecular_formula)
                st.write('Molecular Weight: ' + str(vioxx.molecular_weight))
                st.write('IUPAC Name: ' + vioxx.iupac_name)
                st.write('xlogp value: ' + str(vioxx.xlogp))
Example #3
0
def main():

    st.title("Bioinformatik Uygulaması")

    st.sidebar.header("Lütfen Seçim Yapınız")
    secenekler = ["Giriş","DNA Sekansı", "Çoklu Sekans Hizalama", "Sekans Puanlaması", "Hedef Protein Analizi", "Protein Çözünürlüğü"]
    select_box = st.sidebar.selectbox("Yapmak istediğiniz işlem: ", secenekler)

    if select_box == "Giriş":
        st.header("Burası giriş sayfasıdır")
        st.text("")
        st.write("Buraya site hakkında genel bilgilendirmeler girilecektir.")
        st.write("lorem ipsum dolor.")
    elif select_box == "DNA Sekansı":
        st.subheader("DNA Sekans Analizi")

        st.warning("Lütfen DNA Sekansınızı sol taraftaki barda bulunan dosya yükleme kısmından içeri aktarınız.")

        seq_dosya = st.sidebar.file_uploader("Lütfen FASTA yapılı dosyanızı girin", type=["FASTA","fa"])
        
        if seq_dosya is not None:
            dna = SeqIO.read(seq_dosya, "fasta")
            st.write(dna)

            dna_sekansi = dna.seq

            details = st.radio("Detaylar", ("Açıklama", "Sekansı Göster"))
            if details == "Açıklama":
                st.text("")
                st.write(dna.description)
            elif details == "Sekansı Göster":
                st.text("")
                st.write(dna.seq)

            st.text("")
            st.text("")

            st.subheader("Nükleotid bilgisi")

            st.text("")


            if ("M" and "L") in str(dna_sekansi):
                st.write("Nükleotid bilgilerinin hesaplanabilmesi için lütfen bir **DNA sekansı** giriniz!")
                

            else:
                
                adenin = int(str(dna_sekansi).count("A"))
                guanin = int(str(dna_sekansi).count("G"))
                citosin = int(str(dna_sekansi).count("C"))
                timin = int(str(dna_sekansi).count("T"))
                st.write("**Adenin** sayısı = {0} ".format(adenin))
                st.write("**Timin** sayısı = {0} ".format(timin))
                st.write("**Guanin** sayısı = {0} ".format(guanin))
                st.write("**Sitozin** sayısı = {0} ".format(citosin))

                st.text("")
                st.text("")

                if st.checkbox("Grafik üzerinde göster"):
                    adenin_color = st.beta_color_picker('Adenin için renk seçin', "#F50000")
                    timin_color = st.beta_color_picker('Timin için renk seçin', "#00DE2D")
                    guanin_color = st.beta_color_picker('Guanin için renk seçin', "#1A00FF")
                    sitozin_color = st.beta_color_picker('Sitozin için renk seçin', "#000000")
                    
                    numbers = [adenin, timin, guanin, citosin]
                    plot(numbers, adenin_color, timin_color, guanin_color, sitozin_color)
                
                st.text("")

                st.subheader("İçerik oranları")
                st.text("")

                gc_orani = round(gc_content(dna_sekansi), 2)
                at_orani = round(at_content(dna_sekansi),2)

                st.write("**GC** oranı = % {0}".format(gc_orani))
                st.write("**AT** oranı = % {0}".format(at_orani))

                st.text("")

                st.subheader("Protein Sentezi")
                aa = dna_sekansi.translate()
                aa_frekansi = Counter(str(aa))
                st.text("")

                if st.button("Translasyon Tablosunu Görmek için Tıklayınız"):
                    standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
                    st.text(standard_table)
                
                st.text("")

                if st.checkbox("Transkripsiyon"):
                    transkribe = dna_sekansi.transcribe()
                    st.write(transkribe)
                elif st.checkbox("Translasyon"):
                    transle = dna_sekansi.translate()
                    st.write(transle)
                elif st.checkbox("Complement"):
                    st.write(dna_sekansi.complement())
                elif st.checkbox("Amino Asit Sıklığı"):
                    st.write(aa_frekansi)
                elif st.checkbox("Amino Asit Grafiği"):
                    st.text("")
                    aa_color = st.beta_color_picker("Buradan Renk Değiştirebilirsin")
                    plt.bar(aa_frekansi.keys(), aa_frekansi.values(), color = aa_color)
                    st.pyplot()
                elif st.checkbox("Tam Amino Asit İsimleri"):
                    st.text("")
                    aa_ismi = str(aa).replace("*", "")
                    aa3 = utils.convert_1to3(aa_ismi)
                    st.write("**Harf Gösterimi**")
                    st.text(aa_ismi)
                    st.write("**************************************")
                    st.write("**Kısaltma Gösterimi**")
                    st.text(aa3)
                    st.write("**************************************")
                    st.write("**Açık İsim Gösterimi**")
                    st.text(utils.get_acid_name(aa3))              

    elif select_box == "Çoklu Sekans Hizalama":

        st.warning("Lütfen karşılaştırma yapacağınız sekansları sol taraftaki barda bulunan dosya yükleme kısmından içeri aktarınız.")
        seq1 = st.sidebar.file_uploader("1.FASTA dosyanızı giriniz", type=["fasta", "fa"])

        seq2 = st.sidebar.file_uploader("2.FASTA dosyanızı giriniz", type=["fasta", "fa"])

        if seq1 and seq2 is not None:
            sekans1 = SeqIO.read(seq1, "fasta")
            sekans2 = SeqIO.read(seq2, "fasta")

            st.text("")

            st.write("**1.Sekans** = {0}".format(sekans1.description))
            st.text("")
            st.write("**2.Sekans** = {0}".format(sekans2.description))
            st.text("")
            st.write("**Hizalanmış Durum:**")

            alignments = pairwise2.align.globalxx(sekans1.seq, sekans2.seq)
            st.text(pairwise2.format_alignment(*alignments[0]))
            st.text("")

            if st.checkbox("BLOSUM62 Puanlamasını Görmek için tıklayınız"):

                secenek = st.radio("Seçenekler",("Hizalanmış Sekans Gösterimi","Tüm Sekans Gösterimi"))

                if secenek == "Hizalanmış Sekans Gösterimi":

                    blosum62 = substitution_matrices.load("BLOSUM62")
                    alignment_blossum = pairwise2.align.localds(sekans1.seq, sekans2.seq, blosum62, -10, -1)
                    st.text(pairwise2.format_alignment(*alignment_blossum[0]))
                    
                elif secenek == "Tüm Sekans Gösterimi":
                    blosum62_2 = substitution_matrices.load("BLOSUM62")
                    full_alignment_blossum = pairwise2.align.localds(sekans1.seq, sekans2.seq, blosum62_2, -10, -1)
                    st.text(pairwise2.format_alignment(*full_alignment_blossum[0], full_sequences=True))

                  #Bu kısımda geliştirme olarak gap-penalty ve gap-extension gibi değerleri kullanıcının değiştirebileceği gibi ayarlayabiliriz. 
                  #Geliştirme olarak sekansları taşımak yerine yazılabilir hale de getirebilirim!!!!!

    elif select_box == "Sekans Puanlaması":

        
        st.text("")
        st.text("")
        st.text("")
        st.subheader("Kendi Hesaplamanızı Yapın:")
        
        seq1_puan = st.text_area("1.Sekans")
        seq2_puan = st.text_area("2.Sekans")
        substitution_matrices.load()
        option = st.selectbox('Kullanmak İstediğiniz Matrix?',("BENNER22", 'BENNER6', 'BENNER74', 'BLOSUM45', 'BLOSUM50', 'BLOSUM62', 'BLOSUM80', 'BLOSUM90', 'DAYHOFF', 'FENG', 'GENETIC', 'GONNET1992', 'JOHNSON', 'JONES', 'LEVIN', 'MCLACHLAN', 'MDM78', 'NUC.4.4', 'PAM250', 'PAM30', 'PAM70', 'RAO', 'RISLER', 'SCHNEIDER', 'STR'))
        st.write('Seçtiğiniz Matrix:', option)
        try:        
            aligner = Align.PairwiseAligner()
            if option == "BENNER22":
                matrix = substitution_matrices.load("BENNER22")
                st.text(matrix)
                aligner.substitution_matrix = matrix
                score = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score)) 
            elif option == "BLOSUM62":
                matrix2 = substitution_matrices.load("BLOSUM62")
                st.text(matrix2)
                aligner.substitution_matrix = matrix2
                score2 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score2))
            elif option == "BENNER6":
                matrix3 = substitution_matrices.load("BENNER6")
                st.text(matrix3)
                aligner.substitution_matrix = matrix3
                score3 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score3))
            elif option == "BENNER74":
                matrix4 = substitution_matrices.load("BENNER74")
                st.text(matrix4)
                aligner.substitution_matrix = matrix4
                score4 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score4))
            elif option == "BLOSUM45":
                matrix5 = substitution_matrices.load("BLOSUM45")
                st.text(matrix5)
                aligner.substitution_matrix = matrix5
                score5 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score5))
            elif option == "BLOSUM50":
                matrix6 = substitution_matrices.load("BLOSUM50")
                st.text(matrix6)
                aligner.substitution_matrix = matrix6
                score6 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score6))
            elif option == "BLOSUM80":
                matrix7 = substitution_matrices.load("BLOSUM80")
                st.text(matrix7)
                aligner.substitution_matrix = matrix7
                score7 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score7))
            elif option == "BLOSUM90":
                matrix8 = substitution_matrices.load("BLOSUM90")
                st.text(matrix8)
                aligner.substitution_matrix = matrix8
                score8 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score8))
            elif option == "DAYHOFF":
                matrix9 = substitution_matrices.load("DAYHOFF")
                st.text(matrix9)
                aligner.substitution_matrix = matrix9
                score9 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score9))
            elif option == "FENG":
                matrix10 = substitution_matrices.load("FENG")
                st.text(matrix10)
                aligner.substitution_matrix = matrix10
                score10 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score10))
            elif option == "GENETIC":
                matrix11 = substitution_matrices.load("GENETIC")
                st.text(matrix11)
                aligner.substitution_matrix = matrix11
                score11 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score11))
            elif option == "GONNET1992":
                matrix12 = substitution_matrices.load("GONNET1992")
                st.text(matrix12)
                aligner.substitution_matrix = matrix12
                score12 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score12))
            elif option == "JOHNSON":
                matrix13 = substitution_matrices.load("JOHNSON")
                st.text(matrix13)
                aligner.substitution_matrix = matrix13
                score13 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score13))
            elif option == "JONES":
                matrix14 = substitution_matrices.load("JONES")
                st.text(matrix14)
                aligner.substitution_matrix = matrix14
                score14 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score14))
            elif option == "LEVIN":
                matrix15 = substitution_matrices.load("LEVIN")
                st.text(matrix15)
                aligner.substitution_matrix = matrix15
                score15 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score15))
            elif option == "MCLACHLAN":
                matrix16 = substitution_matrices.load("MCLACHLAN")
                st.text(matrix16)
                aligner.substitution_matrix = matrix16
                score16 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score16))
            elif option == "MDM78":
                matrix17 = substitution_matrices.load("MDM78")
                st.text(matrix17)
                aligner.substitution_matrix = matrix17
                score17 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score17))
            elif option == "NUC.4.4":
                matrix18 = substitution_matrices.load("NUC.4.4")
                st.text(matrix18)
                aligner.substitution_matrix = matrix18
                score18 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score18))
            elif option == "PAM250":
                matrix19 = substitution_matrices.load("PAM250")
                st.text(matrix19)
                aligner.substitution_matrix = matrix19
                score19 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score19))
            elif option == "PAM30":
                matrix20 = substitution_matrices.load("PAM30")
                st.text(matrix20)
                aligner.substitution_matrix = matrix20
                score20 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score20))
            elif option == "PAM70":
                matrix21 = substitution_matrices.load("PAM70")
                st.text(matrix21)
                aligner.substitution_matrix = matrix21
                score21 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score21))
            elif option == "RAO":
                matrix22 = substitution_matrices.load("RAO")
                st.text(matrix22)
                aligner.substitution_matrix = matrix22
                score22 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score22))
            elif option == "RISLER":
                matrix23 = substitution_matrices.load("RISLER")
                st.text(matrix23)
                aligner.substitution_matrix = matrix23
                score23 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score23))
            elif option == "SCHNEIDER":
                matrix24 = substitution_matrices.load("SCHNEIDER")
                st.text(matrix24)
                aligner.substitution_matrix = matrix24
                score24 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score24))
            elif option == "STR":
                matrix25 = substitution_matrices.load("STR")
                st.text(matrix25)
                aligner.substitution_matrix = matrix25
                score25 = aligner.score(seq1_puan, seq2_puan)
                st.write("**Sekans Puanı** = {0} ".format(score25))
            
            #aligner.substitution_matrix = matrix
            #score = aligner.score("ACDQ", "ACDQ")
            #st.write(score)
        except:
            st.text("")

    elif select_box == "Hedef Protein Analizi":
        try:
            st.text("")
            arama = st.sidebar.text_input("Aramak İstediğiniz Proteini Yazınız.", "coronavirus")
            target = new_client.target
            target_query = target.search(arama) #Your target protein name that you want to search
            targets = pd.DataFrame.from_dict(target_query)
            st.write("**Chembl Verisi**:")
            st.write(targets)
            st.text("")
            
            hedef_protein = st.number_input("İleri araştırma yapmak istediğiniz Single Proteinin solunda yazan numarayı girin.", min_value=0 ,value=4, format="%d")
            selected_target = targets.target_chembl_id[hedef_protein]
            st.text("")
            st.write("Seçtiğiniz proteinin **ChEMBL ID**'si: {0}".format(selected_target))
            activity = new_client.activity
            res = activity.filter(target_chembl_id=selected_target).filter(standard_type="IC50")
            df = pd.DataFrame.from_dict(res)
            
            st.text("")
        except:
            st.warning("Girilen Değer Geçersiz")
        

        if hedef_protein is not None:
            

            if st.checkbox("Seçtiğiniz Proteinin IC50 değerlerine göre elde edilen verisini görüntülemek için tıklayınız."):
                if df.empty:
                    st.warning("Lütfen bir Single Protein seçiniz!")
                else:
                        
                    st.write(df)
                    st.text("")
                
                    st.markdown(download_button(df, 'IC50_Veri.csv', 'CSV Dosyasını Bilgisayarına Yükle'), unsafe_allow_html=True)
            
            
            st.text("")
            st.text("")
            st.markdown("<h3 style='text-align: center; color: red;'>Seçilen Protein için molekül aktivitesini hesaplayan ML programını çalıştırmak isterseniz aşağıda ki butona tıklayınız.</h3>", unsafe_allow_html=True)
            st.text("")
            
            df2 = df[df.standard_value.notna()]
            bioactivity_class = []

            mol_cid = []
            canonical_smiles = []
            standard_value = []
            for unit in df2.standard_value:
                if float(unit) >= 10000:
                    bioactivity_class.append("inactive")
                elif float(unit) <= 1000:
                    bioactivity_class.append("active")
                else:
                    bioactivity_class.append("intermediate")

            for i in df2.molecule_chembl_id:

                mol_cid.append(i)
            
            for i in df2.canonical_smiles:

                canonical_smiles.append(i)
                
            for i in df2.standard_value:

                standard_value.append(i)
                    
                    

            data_tuples = list(zip(mol_cid, canonical_smiles, standard_value, bioactivity_class))
            df3 = pd.DataFrame( data_tuples,  columns=['molecule_chembl_id', 'canonical_smiles', 'standard_value','bioactivity_class' ])
            st.text("")
            if df.empty:
                st.warning("Lütfen bir Single Protein seçiniz!")
            else:
                if st.checkbox("Moleküler Aktivite Hesapla"):
                    st.text("")
                    st.text("")
                    st.write(df3)
                    st.text("")
                    st.markdown(download_button(df3, 'Genel_Veri.csv', 'CSV Dosyasını Bilgisayarına Yükle'), unsafe_allow_html=True)

                    st.text("")
                    if st.selectbox("Yalnızca Aktif Olanları Göster",("Aktif","")):
                        active_data = (df3.loc[df3['bioactivity_class'] == "active"])
                        st.write(active_data)
                        st.text("")
                        st.markdown(download_button(active_data, 'Aktif_Veri.csv', 'CSV Dosyasını Bilgisayarına Yükle'), unsafe_allow_html=True)

            
                st.text("")
                st.text("")
                st.markdown("<h3 style='text-align: center; color: red;'>Lipinski Tanımlayıcılarını Hesaplamak için aşağıdaki butona tıklayınız.</h3>", unsafe_allow_html=True)
                st.text("")
                
                button_sent = st.checkbox("Lipinski Tanımlayıcıları")
                
                if button_sent:
                    session_state.button_sent = True

                if session_state.button_sent:
                    st.subheader("Lipinski Verisi:")
                    st.write("**MW** = Moleküler Ağırlık")
                    st.write("**LogP** = Molekül Çözünürlüğü")
                    st.write("**NumHDonors** = Hidrojen Bağı Vericileri")
                    st.write("**NumHAcceptors** = Hidrojen Bağı Alıcıları")
                    exploratory_data = df3
                    df_lipinski = lipinski(exploratory_data.canonical_smiles)
                    #st.write(df_lipinski)
                    df_combined = pd.concat([exploratory_data,df_lipinski], axis=1)
                    st.subheader("Birleştirilmiş Veri:")
                    st.write(df_combined)
                    st.markdown(download_button(df_combined, 'Birleştirilmiş_Veri.csv', 'CSV Dosyasını Bilgisayarına Yükle'), unsafe_allow_html=True)
                    df_norm = norm_value(df_combined)
                    #st.write(df_norm)
                    df_final = pIC50(df_norm)
                    st.subheader("IC50'nin pIC50'ye dönüştürülmüş halindeki veri seti:")
                    st.write(df_final)
                    st.markdown(download_button(df_final, 'pIC50_Verisi.csv', 'CSV Dosyasını Bilgisayarına Yükle'), unsafe_allow_html=True)
                    df_class = df_final[df_final.bioactivity_class != "intermediate"]

                    def mannwhitney(descriptor, verbose=False):

                        # seed the random number generator
                        seed(1)

                        # actives and inactives
                        selection = [descriptor, 'bioactivity_class']
                        df = df_class[selection]
                        active = df[df.bioactivity_class == 'active']
                        active = active[descriptor]

                        selection = [descriptor, 'bioactivity_class']
                        df = df_class[selection]
                        inactive = df[df.bioactivity_class == 'inactive']
                        inactive = inactive[descriptor]

                        # compare samples
                        stat, p = mannwhitneyu(active, inactive)
                        #print('Statistics=%.3f, p=%.3f' % (stat, p))

                        # interpret
                        alpha = 0.05
                        if p > alpha:
                            interpretation = 'Same distribution (fail to reject H0)'
                        else:
                            interpretation = 'Different distribution (reject H0)'
                        
                        results = pd.DataFrame({'Descriptor':descriptor,
                                                'Statistics':stat,
                                                'p':p,
                                                'alpha':alpha,
                                                'Interpretation':interpretation}, index=[0])
                        filename = 'mannwhitneyu_' + descriptor + '.csv'
                        results.to_csv(filename)

                        return results

                    st.text("")
                    st.text("")
                    session_state.grafik = st.checkbox("Aktif/İnaktif Molekül Grafiği")
                    session_state.mw = st.checkbox("Moleküler Ağırlık/Çözünürlük Grafiği")
                    session_state.pic50 = st.checkbox("pIC50/Moleküler Aktiflik Grafiği")
                    session_state.logp = st.checkbox("Çözünürlük/Moleküler Aktiflik Grafiği")
                    session_state.donors = st.checkbox("Hidrojen Bağı Vericiler/Moleküler Aktiflik Grafiği")
                    session_state.acceptors = st.checkbox("Hidrojen Bağı Alıcılar/Moleküler Aktiflik Grafiği")

                    if session_state.grafik:
                        st.write("**********************************")
                        st.text("")
                        st.subheader("**Aktif/İnaktif Molekül Grafiği**")

                        plt.figure(figsize=(5.5, 5.5))

                        #sns.countplot(x='bioactivity_class', data=df_class, edgecolor='black')

                        plt.xlabel('Bioactivity class', fontsize=14, fontweight='bold')
                        plt.ylabel('Frequency', fontsize=14, fontweight='bold')
                        
                        st.pyplot()
                        #st.markdown(get_table_download_link(veri), unsafe_allow_html=True)
                        
                        #Buralara PDF indirici eklenecek

                    if session_state.mw:
                        st.write("**********************************")
                        st.text("")
                        st.subheader("**Moleküler Ağırlık/Çözünürlük Grafiği**")

                        plt.figure(figsize=(5.5, 5.5))
                        #sns.scatterplot(x='MW', y='LogP', data=df_class, hue='bioactivity_class', size='pIC50', edgecolor='black', alpha=0.7)

                        plt.xlabel('MW', fontsize=14, fontweight='bold')
                        plt.ylabel('LogP', fontsize=14, fontweight='bold')
                        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0)
                        st.pyplot()
                        
                        #Buralara PDF indirici eklenecek
                        st.write("**Mann-Whitney U Test Verisi**:")
                        st.write(mannwhitney("MW"))

                    if session_state.pic50:
                        st.write("**********************************")
                        st.text("")
                        st.subheader("**pIC50/Moleküler Aktiflik Grafiği**")

                        plt.figure(figsize=(5.5, 5.5))

                        sns.boxplot(x = 'bioactivity_class', y = 'pIC50', data = df_class)

                        plt.xlabel('Bioactivity class', fontsize=14, fontweight='bold')
                        plt.ylabel('pIC50 value', fontsize=14, fontweight='bold')
                        st.pyplot()
                        #Buralara PDF indirici eklenecek

                        st.write("**Mann-Whitney U Test Verisi**:")
                        st.write(mannwhitney("pIC50"))
                    
                    if session_state.logp:
                        st.write("**********************************")
                        st.text("")
                        st.subheader("**Çözünürlük/Moleküler Aktiflik Grafiği**")

                        plt.figure(figsize=(5.5, 5.5))

                        sns.boxplot(x = 'bioactivity_class', y = 'LogP', data = df_class)

                        plt.xlabel('Bioactivity class', fontsize=14, fontweight='bold')
                        plt.ylabel('LogP', fontsize=14, fontweight='bold')
                        st.pyplot()
                        #Buralara PDF indirici eklenecek

                        st.write("**Mann-Whitney U Test Verisi**:")
                        st.write(mannwhitney("LogP"))
                    
                    if session_state.donors:
                        st.write("**********************************")
                        st.text("")
                        st.subheader("**Hidrojen Bağı Vericiler/Moleküler Aktiflik Grafiği**")

                        plt.figure(figsize=(5.5, 5.5))

                        sns.boxplot(x = 'bioactivity_class', y = 'NumHDonors', data = df_class)

                        plt.xlabel('Bioactivity class', fontsize=14, fontweight='bold')
                        plt.ylabel('NumHDonors', fontsize=14, fontweight='bold')
                        st.pyplot()
                        #Buralara PDF indirici eklenecek

                        st.write("**Mann-Whitney U Test Verisi**:")
                        st.write(mannwhitney("NumHDonors"))

                    if session_state.acceptors:
                        st.write("**********************************")
                        st.text("")
                        st.subheader("**Hidrojen Bağı Alıcılar/Moleküler Aktiflik Grafiği**")

                        plt.figure(figsize=(5.5, 5.5))

                        sns.boxplot(x = 'bioactivity_class', y = 'NumHAcceptors', data = df_class)

                        plt.xlabel('Bioactivity class', fontsize=14, fontweight='bold')
                        plt.ylabel('NumHAcceptors', fontsize=14, fontweight='bold')
                        st.pyplot()
                        #Buralara PDF indirici eklenecek

                        st.write("**Mann-Whitney U Test Verisi**:")
                        st.write(mannwhitney("NumHAcceptors"))


                
            
            

    elif select_box == "Protein Çözünürlüğü":
        pass
def main():

    st.title('Simple Bioinformatics App')
    menu = ["DNA Sequence", "Dot Plot"]
    choice = st.sidebar.selectbox("Select Activity", menu)

    if choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")
        seq_file = st.file_uploader("Upload FASTA File", type=["fasta", "fa"])
        #text_io = io.TextIOWrapper(seq_file)

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq
            desc = dna_record.description
            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(desc)
            elif details == "Sequence":
                st.write(dna_seq)

            # Nucleotide Frequencies
            st.subheader("Nucleotide Frequencies")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            thymine_color = st.beta_color_picker("Thymine Color")
            cytosil_color = st.beta_color_picker("Cytosil Color")

            if st.button("Plot Freq"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[0].set_color(adenine_color)
                barlist[1].set_color(thymine_color)
                barlist[2].set_color(guanine_color)
                barlist[3].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content ": gc_score, "AT Content ": at_score})

            # Nucleotide Count
            nt_count = st.text_input("Enter Nucleotide Here",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} nucleotide is : {} ".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            st.subheader("Protein Synthesis")
            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())
            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())
            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())
            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)
            elif st.checkbox("AA Plot Frequency"):
                #aa_color = st.beta_color_picker("Amino Acid Color")
                #barlist = plt.bar(aa_freq.keys(), aa_freq.values(), color = aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values())
                #barlist[0].set_color(aa_color)
                st.pyplot()
            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                st.write(aa_name)
                st.write("--------------------------")
                st.write(utils.convert_1to3(aa_name))

    elif choice == "Dot Plot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file = st.file_uploader("Upload 1st FASTA File",
                                    type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        # text_io = io.TextIOWrapper(seq_file)

        if seq_file and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")

            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            desc1 = dna_record1.description
            desc2 = dna_record2.description

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(desc1)
                st.write("----------")
                st.write(desc2)
            elif details == "Sequence":
                st.write(dna_seq1)
                st.write("----------")
                st.write(dna_seq2)

            custom_limit = st.number_input("Select max number of Nucleotide ",
                                           10, 200, 25)
            if st.button("Dot Plot"):
                st.write("Comparing the first {} Nucleotide of Two Sequences ".
                         format(custom_limit))
                dotplotx(dna_seq1[0:custom_limit], dna_seq2[0:custom_limit])
                st.pyplot()
Example #5
0
def main():
    """Bioinformatics Genome analysis web app"""

    st.title(
        "DNA Genome analysis and Cosine Similarity Analysis web application")
    menu = [
        "Introduction", "DNA sequence Analysis", "Dotplot Analysis", "About us"
    ]
    choice = st.sidebar.selectbox("Select Option", menu)

    if choice == "Introduction":
        st.subheader("Welcome to our Sequence Analysis Application :)")
    elif choice == "DNA sequence Analysis":
        st.subheader("DNA sequence Analysis will be done here.")
        seq_file = st.file_uploader(
            "Upload the .FASTA file for any DNA analysis of the considered Genome.",
            type=["fasta", "fa"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio(
                "Details of the DNA as provided by NCBI database:",
                ("DNA Record description", "Sequence"))
            if details == "DNA Record description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            #Nucleotide
            st.subheader("Nucleotide Frequency :")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Toggle the Adenine Colour ")
            guanine_color = st.beta_color_picker("Toggle the Guanine Colour ")
            thymine_color = st.beta_color_picker("Toggle the Thymine Colour ")
            cytosine_color = st.beta_color_picker(
                "Toggle the Cytosine Colour ")

            if st.button("Plot frequency"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[0].set_color(adenine_color)
                barlist[1].set_color(guanine_color)
                barlist[2].set_color(thymine_color)
                barlist[3].set_color(cytosine_color)
                st.pyplot()

            st.subheader("DNA complete Composition")

            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({
                "GC Content(for heat stability)": gc_score,
                "AT Content": at_score
            })

            #protein synthesis
            st.subheader("Protein Synthesis operations on the DNA :")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))
            if st.checkbox("Transcription :"):
                st.write(dna_seq.transcribe())
            elif st.checkbox("Translation :"):
                st.write(dna_seq.translate())
            elif st.checkbox("Complement :"):
                st.write(dna_seq.complement())
            elif st.checkbox("Amino Acid frequency :"):
                st.write(aa_freq)

            elif st.checkbox("Plot the Amino Acid frequency :"):
                aa_color = st.beta_color_picker("Pick the Amino acid color:")
                #barlist= plt.bar(aa_freq.keys(),aa_freq.values(),color=aa_color)
                #barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("The complete Amino acid name is given as"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("========================")
                st.write(aa3)

                st.write("========================")
                st.write(utils.get_acid_name(aa3))

            #Top most amino acids

    elif choice == "Dotplot Analysis":
        st.subheader(
            "Generate Dotplot for the comparision between two DNA sequences here."
        )
        seq_file1 = st.file_uploader(
            "Upload the first .FASTA file for any DNA analysis of the considered Genome.",
            type=["fasta", "fa"])
        seq_file2 = st.file_uploader(
            "Upload the second .FASTA file for any DNA analysis of the considered Genome.",
            type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            #st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            details = st.radio(
                "Details of the DNA as provided by NCBI database:",
                ("Record details from the NCBI database", "Gene Sequence"))
            if details == "Record details from the NCBI database":
                st.write(dna_record1.description)
                st.write("===And the other Record is decribed as :===")
                st.write(dna_record2.description)

            elif details == "Gene Sequence":
                st.write(dna_record1.seq)
                st.write("===And the other sequence can be given as: ===")
                st.write(dna_record2.seq)

            display_limit = st.number_input(
                "Select maximum number of Nucleotides", 10, 200, 50)
            if st.button("Push here for Dotplot :)"):
                st.write(
                    "Comparing the first {} nucleotide of the two sequences".
                    format(display_limit))
                dotplotx(dna_seq1[0:display_limit], dna_seq2[0:display_limit])
                st.pyplot()

    elif choice == "About us":
        st.subheader("About the application and about us :)")
Example #6
0
def main():
    """A Bioinformatics App used to visuallize recombination site"""
    st.title(
        "RSarbiter: a machine learning supported expert in prediction and analysis of recombination spots in S. cerevisiae"
    )
    menu = ["Intro", "DNA Sequence", "Dot Plot", "About"]
    choice = st.sidebar.selectbox("Select Activity", menu)
    if choice == "Intro":
        st.subheader("Intro to BioInformatics")
    elif choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")
        seq_file = st.file_uploader("Upload FASTA File",
                                    type=["fasta", "fa", "txt"])
        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            # dna_record = list(SeqIO.parse(seq_file,"fasta"))
            dna_seq = dna_record.seq
            st.write(dna_record)
            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            else:
                st.write(dna_record.seq)
            # Nucleotide Frequency
            st.subheader("Nucleotide Frequency")
            dan_freq = Counter(dna_seq)
            st.write(dan_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("thymine Color")
            cytosine_color = st.beta_color_picker("cytosine Color")
            guanine_color = st.beta_color_picker("guanine Color")
            if st.button("Plot Freq"):
                barlist = plt.bar(dan_freq.keys(), dan_freq.values())
                barlist[0].set_color(adenine_color)
                barlist[1].set_color(thymine_color)
                barlist[2].set_color(guanine_color)
                barlist[3].set_color(cytosine_color)
                st.pyplot()
            st.subheader("DNA composition")
            gc_content = utils.gc_content(dna_seq)
            at_content = utils.at_content(dna_seq)
            st.write({
                "GC Content": gc_content,
                "AT Content": at_content
            })  # st.write = st.json

            # Nucleotide Count
            nt_count = st.text_input("Enter Nucleotide Here",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is {}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))
            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())
            # Top Most Common Amino
            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())
            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())
            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)
            elif st.checkbox("Plot AA Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                # barlist = plt.bar(aa_freq.keys(),aa_freq.values())
                # barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()
            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("======================")
                st.write(aa3)
                st.write("======================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "Dot Plot":
        st.subheader("Generate a Dot Plot For Two Sequences")
        # seq_file1 = st.file_uploader("Upload 1st FASTA File",type=["fasta","fa"])
        # seq_file2 = st.file_uploader("Upload 2nd FASTA File",type=["fasta","fa"])

        # if seq_file1 and seq_file2 is not None:
        #     dna_record1 = SeqIO.read(seq_file1,"fasta")
        #     dna_record2 = SeqIO.read(seq_file2,"fasta")
        #     # st.write(dna_record)
        #     dna_seq1 = dna_record1.seq
        #     dna_seq2 = dna_record2.seq

        #     details = st.radio("Details",("Description","Sequence"))
        #     if details == "Description":
        #         st.write(dna_record1.description)
        #         st.write("=====================")
        #         st.write(dna_record2.description)
        #     elif details == "Sequence":
        #         st.write(dna_record1.seq)
        #         st.write("=====================")
        #         st.write(dna_record2.seq)

        #     cus_limit = st.number_input("Select Max number of Nucleotide",10,200,50)
        #     if st.button("Dot Plot"):
        #         st.write("Comparing the first {} Nucleotide of the Two Sequences".format(cus_limit))
        #         dotplotx(dna_seq1[0:cus_limit],dna_seq2[0:cus_limit])

        #         st.pyplot()

        seq_file_1 = st.file_uploader("Upload 1st FASTA File",
                                      type=["fasta", "fa", "txt"])
        seq_file_2 = st.file_uploader("Upload 2nd FASTA File",
                                      type=["fasta", "fa", "txt"])

        if seq_file_1 and seq_file_2 is not None:
            dna_record_1 = SeqIO.read(seq_file_1, "fasta")
            dna_record_2 = SeqIO.read(seq_file_2, "fasta")
            dna_seq_1 = dna_record_1.seq
            dna_seq_2 = dna_record_2.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record_1.description)
                st.write("======================")
                st.write(dna_record_2.description)
            elif details == "Sequence":
                st.write(dna_record_1.seq)
                st.write("==============")
                st.write(dna_record_2.seq)
            cus_limit = st.number_input("Select Max Number of Nucleotide", 10,
                                        200, 50)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing The First {} Nucleotide of The Two Sequence".
                    format(cus_limit))
                dotplotx(dna_seq_1[0:cus_limit], dna_seq_2[0:cus_limit])
                st.pyplot()

    elif choice == "About":
        st.subheader("About")
Example #7
0
def main():
    image = Image.open('b2.png')
    st.image(image, width=200)
    st.title("Trisakti Bioinformatics Application")
    st.title("Powered by Python")

    menu = ["Intro", "About", "DNA Sequence", "DotPlot"]
    choice = st.sidebar.selectbox("Select Activity", menu)

    if choice == "Intro":
        st.subheader("Intro to BioInformatics")
        image = Image.open('dna.png')
        st.image(image, width=800)

        st.subheader("Bioinformatics")
        "Bioinformatika merupakan cabang ilmu dari biologi yang mengkombinasikan penggunaan komputerisasi dengan karakterisik molekuler biologi. Kombinasi ini disebabkan karena perkembangan teknologi informasi yang sangat pesat, sehingga memudahkan untuk dilakukannya penelitian, serta dapat memberikan informasi yang akurat berdasarkan pengelolaan data. Bioinformatika mempelajari interpretasi biologis data, dan evolusi berbagai bentuk kehidupan dari pendekatan komputasi."

        st.subheader("DNA")
        "DNA adalah singkatan dari asam deoksiribonukleat, yang merupakan molekul yang menyimpan informasi genetik utama dalam sel. Nukleotida terdiri dari tiga bagian: gugus fosfat, gula pentosa (gula ribosa), dan basa. Basisnya terdiri dari empat jenis: adenin (A), guanin (G), sitosin (C), dan timin (T). A dan G adalah purin dengan dua cincin menyatu. C dan T adalah pirimidin dengan satu cincin tunggal. Selain DNA, ada jenis nukleotida lain yang disebut RNA atau asam ribonukleat."

        st.subheader("Protein/Amino Acid")
        "Asam amino adalah senyawa organik yang memiliki gugus fungsional karboksil (-COOH) dan amina (biasanya -NH2). Dalam biokimia sering kali pengertiannya dipersempit: keduanya terikat pada satu atom karbon (C) yang sama (disebut atom C alfa atau α). Gugus karboksil memberikan sifat asam dan gugus amina memberikan sifat basa. Dalam bentuk larutan, asam amino bersifat amfoterik: cenderung menjadi asam pada larutan basa dan menjadi basa pada larutan asam."

        st.subheader("Biopython")
        "Biopython adalah seperangkat alat yang tersedia secara gratis untuk komputasi biologis yang ditulis dengan Python oleh tim pengembang internasional. Aplikasi ini dibuat dan dikembangkan dengan bahasa pemrograman python yang mana menggunakan library biopython untuk proses eksplorasi dan ekstraksi data. Dalam eksplorasi dan ekstraksi data, biopython dapat melakukan proses comparing sequences DNA dan transtlation Nucleotide DNA ke Amino Acid penyusun protein. Berikut ini merupakan tabel Codon transtlation dari Nucleotide ke Amino Acid."
        image = Image.open('protein.png')
        st.image(image, width=800)

    elif choice == "About":

        st.subheader("Sejarah Awal Coronavirus")
        "Coronavirus pertama kali ditemukan pada pertengahan tahun 1960 dengan jenis HCoV-229E. Virus ini bermutasi selama 56 tahun sampai pada tahun 2020 tercatat ada tujuh dari banyaknya jenis spesies virus corona yang menginfeksi manusia muali dari Alpha Coronavirus, Beta Coronavirus, SARS, dan juga MERS. Evolusi dari jenis spesies virus corona dapat terlihat pada gambar di bawah ini."
        image = Image.open('mutasi.PNG')
        st.image(image, width=800)

        st.subheader("Corona Virus Disease 2019")
        "Pandemi koronavirus 2019 (bahasa Inggris: coronavirus disease 2019, disingkat COVID-19) adalah penyakit menular yang disebabkan oleh SARS-CoV-2, salah satu jenis koronavirus. Penyakit ini mengakibatkan pandemi koronavirus 2019–2020.Penderita COVID-19 dapat mengalami demam, batuk kering, dan kesulitan bernapas.Sakit tenggorokan, pilek, atau bersin-bersin lebih jarang ditemukan.Pada penderita yang paling rentan, penyakit ini dapat berujung pada pneumonia dan kegagalan multiorgan.Infeksi menyebar dari satu orang ke orang lain melalui percikan (droplet) dari saluran pernapasan yang sering dihasilkan saat batuk atau bersin. Waktu dari paparan virus hingga timbulnya gejala klinis berkisar antara 1–14 hari dengan rata-rata 5 hari. Metode standar diagnosis adalah uji reaksi berantai polimerase transkripsi-balik (rRT-PCR) dari usap nasofaring atau sampel dahak dengan hasil dalam beberapa jam hingga 2 hari. Pemeriksaan antibodi dari sampel serum darah juga dapat digunakan dengan hasil dalam beberapa hari. Infeksi juga dapat didiagnosis dari kombinasi gejala, faktor risiko, dan pemindaian tomografi terkomputasi pada dada yang menunjukkan gejala pneumonia."

        st.subheader("Tujuan Pembuatan Aplikasi")
        "Tujuan pembuatan dan pengembangan aplikasi ini adalah agar dapat membantu para peneliti dalam menganalisis informasi yang ada pada data DNA dengan bentuk visualisasi diagram plot. Juga hasil informasi dari ekstraksi data DNA menghasilkan pola/patern transtlation protein dari sample DNA Coronavirus. Aplikasi ini dapat memberi hasil persentase similaritas sequencing alignment DNA, sehingga terlihat adanya mutasi genetik yang terjadi."

        st.subheader(
            "Website ini dalam tahap pengembangan & digunakan untuk project penelitian."
        )
        st.subheader("contact : hafiz065001600009.trisakti.ac.id")

    elif choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")

        seq_file = st.file_uploader("Upload FASTA File",
                                    type=["fasta", "fa", "txt"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            # Frekuensi Nucleotide
            st.subheader("Nucleotide Frequency")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("Thymine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            cytosil_color = st.beta_color_picker("Cytosil Color")

            if st.button("Plot Frequency"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[2].set_color(adenine_color)
                barlist[3].set_color(thymine_color)
                barlist[1].set_color(guanine_color)
                barlist[0].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content": gc_score, "AT Content": at_score})

            # Count Nucleotide
            nt_count = st.text_input("Enter Nucleotide",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is ::{}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())

            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())

            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())

            elif st.checkbox("Amino Acid Frequency"):
                st.write(aa_freq)

            elif st.checkbox("Plot Amino Acid Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                #barlist = plt.bar(aa_freq.keys(),aa_freq.values())
                #barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("=========================")
                st.write(aa3)

                st.write("=========================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "DotPlot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file1 = st.file_uploader("Upload 1st FASTA File",
                                     type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq
            dna_freq1 = Counter(dna_seq1)
            dna_freq2 = Counter(dna_seq2)
            p1 = dna_seq1.translate()
            aa_freq1 = Counter(str(p1))
            p2 = dna_seq2.translate()
            aa_freq2 = Counter(str(p2))
            details = st.radio(
                "Details",
                ("Description", "Sequence", "Nucleotide Frequency",
                 "Nucleotide Plot Frequency", "Amino Acid Frequency",
                 "Amino Acid Plot Frequency"))
            if details == "Description":
                st.write(dna_record1.description)
                st.write("=====================")
                st.write(dna_record2.description)
                st.subheader("DNA Composition")
                gc_score1 = utils.gc_content(str(dna_seq1))
                at_score1 = utils.at_content(str(dna_seq1))
                st.json({"GC Content": gc_score1, "AT Content": at_score1})
                gc_score2 = utils.gc_content(str(dna_seq2))
                at_score2 = utils.at_content(str(dna_seq2))
                st.json({"GC Content": gc_score2, "AT Content": at_score2})
            elif details == "Sequence":
                st.write(dna_record1.seq)
                st.write(
                    "========================================================================="
                )
                st.write(dna_record2.seq)
            elif details == "Nucleotide Frequency":
                st.write(dna_freq1)
                st.write("=====================")
                st.write(dna_freq2)
            elif details == "Nucleotide Plot Frequency":
                barlist = plt.bar(dna_freq1.keys(), dna_freq1.values())
                st.pyplot()
                st.write(
                    "=========================================================================="
                )
                barlist = plt.bar(dna_freq2.keys(), dna_freq2.values())
                st.pyplot()
            elif details == "Amino Acid Frequency":
                st.write(aa_freq1)
                st.write("=====================")
                st.write(aa_freq2)
            elif details == "Amino Acid Plot Frequency":
                plt.bar(aa_freq1.keys(), aa_freq1.values())
                st.pyplot()
                st.write(
                    "=========================================================================="
                )
                plt.bar(aa_freq2.keys(), aa_freq2.values())
                st.pyplot()
            cus_limit = st.number_input(
                "Select Max number of Nucleotide (Minimum 100)", 100, 40000,
                10000)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing the first {} Nucleotide of the Two Sequences".
                    format(cus_limit))
                dotplotx(dna_seq1[0:cus_limit], dna_seq2[0:cus_limit])

                st.pyplot()
            elif st.button("Similarity"):
                st.write(
                    "Similarity of Comparing the first {} Nucleotide of the Two Sequences"
                    .format(cus_limit))
                r = pairwise2.align.globalxx(dna_seq1[0:cus_limit],
                                             dna_seq2[0:cus_limit],
                                             one_alignment_only=True,
                                             score_only=True)
                r / len(dna_seq1[0:cus_limit]) * 100
Example #8
0
def main():
    """A simple bioinformatics app"""
    st.title("Simple Bioinformatics App")
    menu = ['Intro', 'DNA Sequence', 'DotPlot', 'About']

    choice = st.sidebar.selectbox('Select Activity', menu)
    if choice == 'Intro':
        st.subheader('Intro to BioInformatics')
    elif choice == 'DNA Sequence':
        st.subheader('DNA Sequence Analysis')
        seq_file = st.file_uploader('Upload FASTA file', type=['fasta', 'fa',
                                                               'txt', 'fna'])

        if seq_file is not None:
            # to no longer autodetect the file's encoding.
            # This means that all files will be returned as binary buffers.
            # thus must wrap in textio wrapper
            # seq_file_io = io.TextIOWrapper(seq_file)
            dna_record = SeqIO.read(seq_file, 'fasta')
            dna_seq = dna_record.seq

            details = st.radio('Details', ('Description','Sequence'))
            if details == 'Description':
                st.write(dna_record.description)
            elif details == 'Sequence':
                st.write(dna_seq)


            # nucleotide frequencies
            st.subheader('Nucleotide Frequency')
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_colour = st.beta_color_picker('Adenine Colour')
            guanine_colour = st.beta_color_picker('Guanine Colour')
            cytosine_colour = st.beta_color_picker('Cytosine Colour')
            thymine_colour = st.beta_color_picker('Thymine Colour')

            if st.button('Plot Frequency'):
                fig, ax = plt.subplots()
                barlist = ax.bar(dna_freq.keys(), dna_freq.values())
                barlist[3].set_color(cytosine_colour)
                barlist[0].set_color(guanine_colour)
                barlist[2].set_color(adenine_colour)
                barlist[1].set_color(thymine_colour)
                st.pyplot(fig)

            st.subheader('DNA Composition')
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.write({'GC Content' : gc_score, 'AT Content' : at_score})
            st.json({'GC Content' : gc_score, 'AT Content' : at_score})

            # nucleotide count
            nt_count = st.text_input('Enter Nucleotide Here', 'Type Nucleotide Alphabet')
            st.write('Number of {} Nucleotide is ::{}'.format(nt_count,\
                                                              str(dna_seq).count(nt_count)))

            # protein synthesis
            st.subheader('Protein Synthesis')
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox('Transcription'):
                st.write(dna_seq.transcribe())

            elif st.checkbox('Translate'):
                st.write(p1)

            elif st.checkbox('Complement'):
                st.write(dna_seq.complement())

            elif st.checkbox('AA Frequency'):
                st.write(aa_freq)

            # top most common amino acid
            elif st.checkbox('Plot AA Frequency'):
                fig, ax = plt.subplots()
                aa_colour = st.beta_color_picker('Pick an amino acid colour')
                barlist = ax.bar(aa_freq.keys(), aa_freq.values(),
                                 color=aa_colour)
                st.pyplot(fig)

            elif st.checkbox('Full amino acid name'):
                aa_name = str(p1).replace('*', '')
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write('='*30)
                st.write(aa3)

                st.write('='*30)
                st.write(utils.get_acid_name(aa3))




    elif choice == 'DotPlot':
        st.subheader('Generate Dot Plot for two Sequences')

        seq_file1 = st.file_uploader('Upload 1st FASTA file', type=['fasta', 'fa',
                                                               'txt', 'fna'])

        seq_file2 = st.file_uploader('Upload 2nd FASTA file', type=['fasta', 'fa',
                                                               'txt', 'fna'])
        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, 'fasta')
            dna_record2 = SeqIO.read(seq_file2, 'fasta')
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            details = st.radio('Details', ('Description','Sequence'))
            if details == 'Description':
                st.write(dna_record1.description)
                st.write('='*50)
                st.write(dna_record2.description)
            elif details == 'Sequence':
                st.write(dna_record1.seq)
                st.write('='*50)
                st.write(dna_record2.seq)

            # (label, min_value, max_value, value)
            custom_limit = st.number_input('Select max number of nucleotides',
                                           10, 400, 50)
            if st.button('Dot Plot'):
                seq1 = join_str(dna_seq1, custom_limit)
                seq2 = join_str(dna_seq2, custom_limit)
                numeric_arr1 = assign_numeric_array(seq1)
                numeric_arr2 = assign_numeric_array(seq2)
                compr_array = generate_comparison_array(numeric_arr1,
                                                        numeric_arr2)
                fig, ax = plot_dotplot(compr_array, seq1, seq2)
                st.write('Comparing the first {} nucleotides of the two sequences'.format(custom_limit))
                st.pyplot(fig)


    elif choice == 'About':
        st.subheader('About')