def test_find_subsequence(): string = "ATACGCTTGCT" substring = "GCT" main_seq = seq.NucleotideSequence(string) sub_seq = seq.NucleotideSequence(substring) matches = seq.find_subsequence(main_seq, sub_seq) assert list(matches) == [4,8]
# :mod:`biotite.sequence.io.fastq` subpackage. # # Alternatively, a sequence can also be loaded from GenBank or GenPept # files, using the :class:`GenBankFile` class (more on this later). # # Sequence search # --------------- # # A sequence can be searched for the position of a subsequence or a # specific symbol: import biotite.sequence as seq main_seq = seq.NucleotideSequence("ACCGTATCAAGTATTG") sub_seq = seq.NucleotideSequence("TAT") print("Occurences of 'TAT':", seq.find_subsequence(main_seq, sub_seq)) print("Occurences of 'C':", seq.find_symbol(main_seq, "C")) ######################################################################## # Sequence alignments # ------------------- # # .. currentmodule:: biotite.sequence.align # # When comparing two (or more) sequences, usually an alignment needs # to be performed. Two kinds of algorithms need to be distinguished # here: # Heuristic algorithms do not guarantee to yield the optimal alignment, # but instead they are very fast. On the other hand, there are # algorithms that calculate the optimal (maximum similarity score) # alignment, but are quite slow.
def RLFP(gene): #The cutting sequances. Notice that this is only two restriction enzymes #for forward and backward translation gene = ''.join(gene.split()) #removing the spaces gene = seq.NucleotideSequence(gene) cut_TaqI = seq.NucleotideSequence("CGA") cut_TaqI_rev = seq.NucleotideSequence("AGCT") cut_HpaII = seq.NucleotideSequence("CGG") cut_HpaII_rev = seq.NucleotideSequence("GGCT") #finding the indexes find_TaqI = list(seq.find_subsequence(gene, cut_TaqI)) find_TaqI_rev = list(seq.find_subsequence(gene, cut_TaqI_rev)) find_HpaII = list(seq.find_subsequence(gene, cut_HpaII)) find_HpaII_rev = list(seq.find_subsequence(gene, cut_HpaII_rev)) #lenghts of the cuts #for Taqi passed_cut_TaqI = [] #list of the indexs that have the cut before it Taqi_length = [] #final length for i in find_TaqI: if gene[i - 1] == "T": passed_cut_TaqI.append(i) for i in passed_cut_TaqI: if i != passed_cut_TaqI[0]: if i != passed_cut_TaqI[-1]: Taqi_length.append(i - passed_cut_TaqI[passed_cut_TaqI.index(i) - 1]) else: Taqi_length.append(len(gene) - i) else: Taqi_length.append(i) #------------------------------------------------------------------------------------------------ #for Taqi reverse passed_cut_TaqI_rev = [] #list of the indexs that have the cut before it TaqI_rev_length = [] #final length for i in find_TaqI_rev: if i != find_TaqI_rev[0]: if i != find_TaqI_rev[-1]: TaqI_rev_length.append(i - find_TaqI_rev[find_TaqI_rev.index(i) - 1]) else: TaqI_rev_length.append(len(gene) - i) else: TaqI_rev_length.append(i) #------------------------------------------------------------------------------------------------ #for HpaII passed_cut_HpaII = [] #list of the indexs that have the cut before it HpaII_length = [] #final length for i in find_HpaII: if gene[i - 1] == "C": passed_cut_HpaII.append(i) for i in passed_cut_HpaII: if i != passed_cut_HpaII[0]: if i != passed_cut_HpaII[-1]: HpaII_length.append( i - passed_cut_HpaII[passed_cut_HpaII.index(i) - 1]) else: HpaII_length.append(len(gene) - i) else: HpaII_length.append(i) #------------------------------------------------------------------------------------------------ #for HpaII_rev passed_cut_HpaII_rev = [] #list of the indexs that hae the cut before it HpaII_rev_length = [] #final length for i in find_HpaII_rev: if i != find_HpaII_rev[0]: if i != find_HpaII_rev[-1]: HpaII_rev_length.append( i - find_HpaII_rev[find_HpaII_rev.index(i) - 1]) else: HpaII_rev_length.append(len(gene) - i) else: HpaII_rev_length.append(i) #--------------------------------------------------------------------------------------------- #building the histograms pwd = os.getcwd() plt.hist(Taqi_length, bins=500, facecolor='blue', alpha=0.5) plt.ylabel("Number of strands") plt.xlabel('length of strands') plt.savefig(pwd + '/static/images/RLFP_Taqi_length.png', dpi=200) plt.hist(TaqI_rev_length, bins=500, facecolor='blue', alpha=0.5) plt.savefig(pwd + '/static/images/RLFP_TaqI_rev_length.png', dpi=200) plt.hist(HpaII_length, bins=500, facecolor='blue', alpha=0.5) plt.savefig(pwd + '/static/images/RLFP_HpaII_length.png', dpi=200) plt.hist(HpaII_rev_length, bins=500, facecolor='blue', alpha=0.5) plt.savefig(pwd + '/static/images/HpaII_rev_length.png', dpi=200) session['valid_seq'] = True