def linear_seqs_division(seqs, ge , s ): """ seqs is array of seq that we want to match ge - gap extextion penalyty s is substitution matrix or array [x,y] x is cost of matching when symbols that we are matching are the same and y if they are not function return array that contain divion of seq in seqs into two smaller string second string is reverse """ H = init_matrix_H([len(seq) + 1 for seq in seqs], ge) d = create_matching_function(s , ge) P = fill_half(H ,seqs, ge, d) a = len(seqs[0])//2 B = np.argwhere(P == np.max(P)) array = [] for b in B: b = b.item() seqs_0_begin = seqs[0][0:a] seqs_1_begin = seqs[1][0:b] seqs_0_end = seqs[0][a:] seqs_1_end = seqs[1][b:] array.append(([seqs_0_begin,seqs_1_begin],[seqs_0_end[::-1],seqs_1_end[::-1]])) return array
def multidimesional_N_W_algoritm(seqs, g, s, max_mathing): """ seqs is array of seq that we want to match g is gap extention penalty s is substitution matrix or array [x,y] x is cost of matching when symbols that we are matching are the same and y if they are not max_mathing is maximal number of matching that are returned function return pair containg cost of matching and matches """ H = init_matrix([len(seq) + 1 for seq in seqs], g) d = create_matching_function(s, g) fill_H_all(H, seqs, d) return H[tuple([len(seq) for seq in seqs])], matching(H, seqs, d, max_mathing)
def linear_gap_algorytm(seqs, go, ge, s, max_number_of_matching): """ seqs is array of seq that we want to match g is gap extention cost s is substitution matrix or array [x,y] x is cost of matching when symbols that we are matching are the same and y if they are not max_mathing is maximal number of matching that are returned function return pair containg cost of matching and matches """ H = init_matrix_H([len(seq) + 1 for seq in seqs], go, ge) E = init_matrix_E([len(seq) + 1 for seq in seqs], go, ge) F = init_matrix_F([len(seq) + 1 for seq in seqs], go, ge) d = create_matching_function(s, ge) fill_all(H, E, F, seqs, go, ge, d) X = matching(H, E, F, seqs, d, go, ge, max_number_of_matching) #X = list(np.unique(X)) return (H[(len(seqs[0]), len(seqs[1]))], X)
""" #print((len(seqs[0]) + 1) * (len(seqs [1]) + 1)) #print("elo") if((len(seqs[0]) + 1) * (len(seqs [1]) + 1) <= L or len(seqs[0]) < 10): return multidimesional_N_W_algoritm(seqs, ge , s, max_number_of_matching) else: array = linear_seqs_division(seqs, ge , s) maximum = -float("inf") for seqs1, seqs_rev1 in array: score_begin, matches_begin = linear_algorytm(seqs1, ge , s, max_number_of_matching, L) score_end, matches_end = linear_algorytm(seqs_rev1, ge , s, max_number_of_matching, L) if (maximum < score_begin +score_end): maximum = score_begin +score_end max_matches_begin = matches_begin max_matches_end = matches_end return maximum, merge(max_matches_begin, max_matches_end, max_number_of_matching) if __name__ == "__main__": s1= "AAABB" s2= "AAA" seqs = [s1, s2 ] ge = - 1 d = create_matching_function([1,-1] , ge) H = init_matrix_H([len(seq) + 1 for seq in seqs] ,ge) X = linear_algorytm(seqs, ge , [1,-1], 100, 7*2) score = X[0] if(not score == 1): raise AssertionError