Beispiel #1
0
def test_find_subsequence():
    string = "ATACGCTTGCT"
    substring = "GCT"
    main_seq = seq.NucleotideSequence(string)
    sub_seq = seq.NucleotideSequence(substring)
    matches = seq.find_subsequence(main_seq, sub_seq)
    assert list(matches) == [4,8]
Beispiel #2
0
# :mod:`biotite.sequence.io.fastq` subpackage.
#
# Alternatively, a sequence can also be loaded from GenBank or GenPept
# files, using the :class:`GenBankFile` class (more on this later).
#
# Sequence search
# ---------------
#
# A sequence can be searched for the position of a subsequence or a
# specific symbol:

import biotite.sequence as seq

main_seq = seq.NucleotideSequence("ACCGTATCAAGTATTG")
sub_seq = seq.NucleotideSequence("TAT")
print("Occurences of 'TAT':", seq.find_subsequence(main_seq, sub_seq))
print("Occurences of 'C':", seq.find_symbol(main_seq, "C"))

########################################################################
# Sequence alignments
# -------------------
#
# .. currentmodule:: biotite.sequence.align
#
# When comparing two (or more) sequences, usually an alignment needs
# to be performed. Two kinds of algorithms need to be distinguished
# here:
# Heuristic algorithms do not guarantee to yield the optimal alignment,
# but instead they are very fast. On the other hand, there are
# algorithms that calculate the optimal (maximum similarity score)
# alignment, but are quite slow.
Beispiel #3
0
def RLFP(gene):
    #The cutting sequances. Notice that this is only two restriction enzymes
    #for forward and backward translation
    gene = ''.join(gene.split())  #removing the spaces
    gene = seq.NucleotideSequence(gene)
    cut_TaqI = seq.NucleotideSequence("CGA")
    cut_TaqI_rev = seq.NucleotideSequence("AGCT")
    cut_HpaII = seq.NucleotideSequence("CGG")
    cut_HpaII_rev = seq.NucleotideSequence("GGCT")
    #finding the indexes
    find_TaqI = list(seq.find_subsequence(gene, cut_TaqI))
    find_TaqI_rev = list(seq.find_subsequence(gene, cut_TaqI_rev))
    find_HpaII = list(seq.find_subsequence(gene, cut_HpaII))
    find_HpaII_rev = list(seq.find_subsequence(gene, cut_HpaII_rev))
    #lenghts of the cuts
    #for Taqi
    passed_cut_TaqI = []  #list of the indexs that have the cut before it
    Taqi_length = []  #final length
    for i in find_TaqI:
        if gene[i - 1] == "T":
            passed_cut_TaqI.append(i)
    for i in passed_cut_TaqI:
        if i != passed_cut_TaqI[0]:
            if i != passed_cut_TaqI[-1]:
                Taqi_length.append(i -
                                   passed_cut_TaqI[passed_cut_TaqI.index(i) -
                                                   1])
            else:
                Taqi_length.append(len(gene) - i)
        else:
            Taqi_length.append(i)
    #------------------------------------------------------------------------------------------------
    #for Taqi reverse
    passed_cut_TaqI_rev = []  #list of the indexs that have the cut before it
    TaqI_rev_length = []  #final length
    for i in find_TaqI_rev:
        if i != find_TaqI_rev[0]:
            if i != find_TaqI_rev[-1]:
                TaqI_rev_length.append(i -
                                       find_TaqI_rev[find_TaqI_rev.index(i) -
                                                     1])
            else:
                TaqI_rev_length.append(len(gene) - i)
        else:
            TaqI_rev_length.append(i)
    #------------------------------------------------------------------------------------------------
    #for HpaII
    passed_cut_HpaII = []  #list of the indexs that have the cut before it
    HpaII_length = []  #final length
    for i in find_HpaII:
        if gene[i - 1] == "C":
            passed_cut_HpaII.append(i)
    for i in passed_cut_HpaII:
        if i != passed_cut_HpaII[0]:
            if i != passed_cut_HpaII[-1]:
                HpaII_length.append(
                    i - passed_cut_HpaII[passed_cut_HpaII.index(i) - 1])
            else:
                HpaII_length.append(len(gene) - i)
        else:
            HpaII_length.append(i)
    #------------------------------------------------------------------------------------------------
    #for HpaII_rev
    passed_cut_HpaII_rev = []  #list of the indexs that hae the cut before it
    HpaII_rev_length = []  #final length
    for i in find_HpaII_rev:
        if i != find_HpaII_rev[0]:
            if i != find_HpaII_rev[-1]:
                HpaII_rev_length.append(
                    i - find_HpaII_rev[find_HpaII_rev.index(i) - 1])
            else:
                HpaII_rev_length.append(len(gene) - i)
        else:
            HpaII_rev_length.append(i)
    #---------------------------------------------------------------------------------------------
    #building the histograms
    pwd = os.getcwd()
    plt.hist(Taqi_length, bins=500, facecolor='blue', alpha=0.5)
    plt.ylabel("Number of strands")
    plt.xlabel('length of strands')
    plt.savefig(pwd + '/static/images/RLFP_Taqi_length.png', dpi=200)
    plt.hist(TaqI_rev_length, bins=500, facecolor='blue', alpha=0.5)
    plt.savefig(pwd + '/static/images/RLFP_TaqI_rev_length.png', dpi=200)
    plt.hist(HpaII_length, bins=500, facecolor='blue', alpha=0.5)
    plt.savefig(pwd + '/static/images/RLFP_HpaII_length.png', dpi=200)
    plt.hist(HpaII_rev_length, bins=500, facecolor='blue', alpha=0.5)
    plt.savefig(pwd + '/static/images/HpaII_rev_length.png', dpi=200)
    session['valid_seq'] = True