def global_align(s, t, matrix, gap): # Score of best alignment ending with a match or mismatch. M = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # Initialize the gap matrices with an arbitrarily small number. # Score of best alignment ending with a space in X. X = [[-9999 for j in range(len(t)+1)] for i in range(len(s)+1)] # Score of best alignment ending with a space in Y. Y = [[-9999 for j in range(len(t)+1)] for i in range(len(s)+1)] for i in range(1, len(s)+1): M[i][0] = gap for j in range(1, len(t)+1): M[0][j] = gap for i in range(1, len(s)+1): for j in range(1, len(t)+1): X[i][j] = max([M[i-1][j] + gap, X[i-1][j]]) Y[i][j] = max([M[i][j-1] + gap, Y[i][j-1]]) M[i][j] = max([M[i-1][j-1] + match_score(matrix, s[i-1], t[j-1]), X[i][j], Y[i][j]]) # The max possible score is found at the bottom-right corner of the matrix. return(M[-1][-1])
def global_align(s, t, matrix, gap): # Score of best alignment ending with a match or mismatch. M = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] # Initialize the gap matrices with an arbitrarily small number. # Score of best alignment ending with a space in X. X = [[-9999 for j in range(len(t) + 1)] for i in range(len(s) + 1)] # Score of best alignment ending with a space in Y. Y = [[-9999 for j in range(len(t) + 1)] for i in range(len(s) + 1)] for i in range(1, len(s) + 1): M[i][0] = gap for j in range(1, len(t) + 1): M[0][j] = gap for i in range(1, len(s) + 1): for j in range(1, len(t) + 1): X[i][j] = max([M[i - 1][j] + gap, X[i - 1][j]]) Y[i][j] = max([M[i][j - 1] + gap, Y[i][j - 1]]) M[i][j] = max([ M[i - 1][j - 1] + match_score(matrix, s[i - 1], t[j - 1]), X[i][j], Y[i][j] ]) # The max possible score is found at the bottom-right corner of the matrix. return M[-1][-1]
def local_align_with_affine(s, t, scores, gap, gap_e): # Initialize the arrays that will contain the previous round of scores. Sx = [0 for i in range(len(t)+1)] Sy = [0 for j in range(len(t)+1)] Sm = [0 for i in range(len(t)+1)] # Initialize the traceback matrix. traceback = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] best = -1 best_pos = (0, 0) # Fill in the Score and Traceback matrices. for i in range(1, len(s)+1): new_x = [0 for i in range(len(t)+1)] new_y = [0 for i in range(len(t)+1)] new_m = [0 for i in range(len(t)+1)] for j in range(1, len(t)+1): new_x[j] = max([Sm[j] + gap, Sx[j] + gap_e]) new_y[j] = max([new_m[j-1] + gap, new_y[j-1] + gap_e]) costM = [Sm[j-1] + match_score(scores, s[i-1], t[j-1]), new_x[j], new_y[j], 0] new_m[j] = max(costM) traceback[i][j] = costM.index(new_m[j]) if new_m[j] > best: best = new_m[j] best_pos = i, j Sx = new_x Sy = new_y Sm = new_m # Initialize the values of i, j i, j = best_pos # Initialize the aligned strings as the input strings. r, u = s[:i], t[:j] # Traceback to build alignment. while traceback[i][j] != 3 and i*j != 0: if traceback[i][j] == 0: i -= 1 j -= 1 elif traceback[i][j] == 1: i -= 1 elif traceback[i][j] == 2: j -= 1 r = r[i:] u = u[j:] return str(best), r, u
def alignment_score(s, t, scores, gap): ''' Returns two matrices of the edit distance and edit alignment between strings s and t. ''' # Initialize the similarity and traceback matrices. S = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] traceback = [[3 for j in range(len(t)+1)] for i in range(len(s)+1)] best = 0 best_pos = (0, 0) # Fill in the matrices. for i in range(1, len(s)+1): for j in range(1, len(t)+1): cost = [ S[i-1][j-1] + match_score(scores, s[i-1], t[j-1]), S[i-1][j] + gap, S[i][j-1] + gap, 0 ] S[i][j] = max(cost) traceback[i][j] = cost.index(S[i][j]) if S[i][j] >= best: best = S[i][j] best_pos = (i, j) # Initialize the values of i,j as the index of the highest score. i, j = best_pos # Initialize the aligned strings as the prefix of the best position. r, u = s[:i], t[:j] # Trace back to the edge of the matrix starting at the best position. while traceback[i][j] != 3 and i*j != 0: if traceback[i][j] == 0: # a match i -= 1 j -= 1 elif traceback[i][j] == 1: # an insertion i -= 1 elif traceback[i][j] == 2: # a deletion j -= 1 # The optimal alignment is then the suffix of the end of the traceback. r = r[i:] u = u[j:] return str(best), r, u
def global_align(s, t, scores, gap): # Initialize the similarity matrix. S = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] for i in range(1, len(s) + 1): S[i][0] = i * gap for j in range(1, len(t) + 1): S[0][j] = j * gap for i in range(1, len(s) + 1): for j in range(1, len(t) + 1): S[i][j] = max([ S[i - 1][j - 1] + match_score(scores, s[i - 1], t[j - 1]), S[i - 1][j] + gap, S[i][j - 1] + gap ]) return S[-1][-1]
def global_align(s, t, scores, gap): # Initialize the similarity matrix. S = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # Each cell in the first row and column recieves a gap penalty. for i in range(1, len(s)+1): S[i][0] = i * gap for j in range(1, len(t)+1): S[0][j] = j * gap # Fill in the similarity matrix. for i in range(1, len(s)+1): for j in range(1, len(t)+1): S[i][j] = max([ S[i-1][j-1] + match_score(scores, s[i-1], t[j-1]), S[i-1][j] + gap, S[i][j-1] + gap ]) # The max possible score is the last cell of the similarity matrix. return S[-1][-1]
def global_align_with_affine(s, t, scores, gap, gap_e): ''' Returns two matrices of the edit distance and edit alignment between strings s and t. ''' # Initialize the three score matrices... M = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] # a (mis)match X = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] # a gap in X Y = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] # a gap in Y # ...and the traceback matrices. traceM = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] traceX = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] traceY = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)] # Initialize the edges of the X and Y matrices with an arbitrarily # large number (closer to negative infinity, the better) so it doesn't # affect calculations. for i in range(1, len(s) + 1): M[i][0] = gap + gap_e * (i - 1) X[i][0] = -9999 Y[i][0] = -9999 for j in range(1, len(t) + 1): M[0][j] = gap + gap_e * (j - 1) X[0][j] = -9999 Y[0][j] = -9999 # Fill in the matrices. for i in range(1, len(s) + 1): for j in range(1, len(t) + 1): costX = [M[i - 1][j] + gap, X[i - 1][j] + gap_e] X[i][j] = max(costX) traceX[i][j] = costX.index(X[i][j]) costY = [M[i][j - 1] + gap, Y[i][j - 1] + gap_e] Y[i][j] = max(costY) traceY[i][j] = costY.index(Y[i][j]) costM = [ M[i - 1][j - 1] + match_score(scores, s[i - 1], t[j - 1]), X[i][j], Y[i][j] ] M[i][j] = max(costM) traceM[i][j] = costM.index(M[i][j]) # The max possible score is found at the bottom-right of the match matrix max_score = M[-1][-1] # Initialize the aligned strings as the input strings. s_align, t_align = s, t # Get the traceback starting position, i.e. the greatest value. scores = [X[i][j], Y[i][j], M[i][j]] max_score = max(scores) traceback = scores.index(max_score) # Initialize the values of i,j i, j = len(s), len(t) # Traceback to build alignment. while i > 0 and j > 0: if traceback == 0: if traceX[i][j] == 0: traceback = 2 i -= 1 t_align = t_align[:j] + '-' + t_align[j:] elif traceback == 1: if traceY[i][j] == 0: traceback = 2 j -= 1 s_align = s_align[:i] + '-' + s_align[i:] elif traceback == 2: if traceM[i][j] == 1: traceback = 0 elif traceM[i][j] == 2: traceback = 1 else: i -= 1 j -= 1 # Fill in any leading gaps. for remaining in range(i): t_align = t_align[:0] + '-' + t_align[0:] for remaining in range(j): s_align = s_align[:0] + '-' + s_align[0:] return (str(max_score), s_align, t_align)
def global_align_with_affine(s, t, scores, gap, gap_e): ''' Returns two matrices of the edit distance and edit alignment between strings s and t. ''' # Initialize the three score matrices... M = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # a (mis)match X = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # a gap in X Y = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # a gap in Y # ...and the traceback matrices. traceM = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] traceX = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] traceY = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # Initialize the edges of the X and Y matrices with an arbitrarily # large number (closer to negative infinity, the better) so it doesn't # affect calculations. for i in range(1, len(s)+1): M[i][0] = gap + gap_e*(i-1) X[i][0] = -9999 Y[i][0] = -9999 for j in range(1, len(t)+1): M[0][j] = gap + gap_e*(j-1) X[0][j] = -9999 Y[0][j] = -9999 # Fill in the matrices. for i in range(1, len(s)+1): for j in range(1, len(t)+1): costX = [M[i-1][j] + gap, X[i-1][j] + gap_e] X[i][j] = max(costX) traceX[i][j] = costX.index(X[i][j]) costY = [M[i][j-1] + gap, Y[i][j-1] + gap_e] Y[i][j] = max(costY) traceY[i][j] = costY.index(Y[i][j]) costM = [M[i-1][j-1] + match_score(scores, s[i-1], t[j-1]), X[i][j], Y[i][j]] M[i][j] = max(costM) traceM[i][j] = costM.index(M[i][j]) # The max possible score is found at the bottom-right of the match matrix max_score = M[-1][-1] # Initialize the aligned strings as the input strings. s_align, t_align = s, t # Get the traceback starting position, i.e. the greatest value. scores = [X[i][j], Y[i][j], M[i][j]] max_score = max(scores) traceback = scores.index(max_score) # Initialize the values of i,j i, j = len(s), len(t) # Traceback to build alignment. while i>0 and j>0: if traceback == 0: if traceX[i][j] == 0: traceback = 2 i -= 1 t_align = t_align[:j] + '-' + t_align[j:] elif traceback == 1: if traceY[i][j] == 0: traceback = 2 j -= 1 s_align = s_align[:i] + '-' + s_align[i:] elif traceback == 2: if traceM[i][j] == 1: traceback = 0 elif traceM[i][j] == 2: traceback = 1 else: i -= 1 j -= 1 # Fill in any leading gaps. for remaining in range(i): t_align = t_align[:0] + '-' + t_align[0:] for remaining in range(j): s_align = s_align[:0] + '-' + s_align[0:] return str(max_score), s_align, t_align