예제 #1
0
def global_align(s, t, matrix, gap):

    # Score of best alignment ending with a match or mismatch.
    M = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]
    # Initialize the gap matrices with an arbitrarily small number.
    # Score of best alignment ending with a space in X.
    X = [[-9999 for j in range(len(t)+1)] for i in range(len(s)+1)]
    # Score of best alignment ending with a space in Y.
    Y = [[-9999 for j in range(len(t)+1)] for i in range(len(s)+1)]

    
    for i in range(1, len(s)+1):
        M[i][0] = gap
    for j in range(1, len(t)+1):
        M[0][j] = gap

    for i in range(1, len(s)+1):
        for j in range(1, len(t)+1):
            X[i][j] = max([M[i-1][j] + gap,
                           X[i-1][j]])
            Y[i][j] = max([M[i][j-1] + gap,
                           Y[i][j-1]])
            M[i][j] = max([M[i-1][j-1] + match_score(matrix, s[i-1], t[j-1]),
                           X[i][j],
                           Y[i][j]])
    
    # The max possible score is found at the bottom-right corner of the matrix.
    return(M[-1][-1])
예제 #2
0
def global_align(s, t, matrix, gap):

    # Score of best alignment ending with a match or mismatch.
    M = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)]
    # Initialize the gap matrices with an arbitrarily small number.
    # Score of best alignment ending with a space in X.
    X = [[-9999 for j in range(len(t) + 1)] for i in range(len(s) + 1)]
    # Score of best alignment ending with a space in Y.
    Y = [[-9999 for j in range(len(t) + 1)] for i in range(len(s) + 1)]

    for i in range(1, len(s) + 1):
        M[i][0] = gap
    for j in range(1, len(t) + 1):
        M[0][j] = gap

    for i in range(1, len(s) + 1):
        for j in range(1, len(t) + 1):
            X[i][j] = max([M[i - 1][j] + gap, X[i - 1][j]])
            Y[i][j] = max([M[i][j - 1] + gap, Y[i][j - 1]])
            M[i][j] = max([
                M[i - 1][j - 1] + match_score(matrix, s[i - 1], t[j - 1]),
                X[i][j], Y[i][j]
            ])

    # The max possible score is found at the bottom-right corner of the matrix.
    return M[-1][-1]
예제 #3
0
def local_align_with_affine(s, t, scores, gap, gap_e):
    # Initialize the arrays that will contain the previous round of scores.
    Sx = [0 for i in range(len(t)+1)]
    Sy = [0 for j in range(len(t)+1)]
    Sm = [0 for i in range(len(t)+1)]
    
    # Initialize the traceback matrix.
    traceback = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]
    
    best = -1
    best_pos = (0, 0)

    # Fill in the Score and Traceback matrices.
    for i in range(1, len(s)+1):
        new_x = [0 for i in range(len(t)+1)]
        new_y = [0 for i in range(len(t)+1)]
        new_m = [0 for i in range(len(t)+1)]
        
        for j in range(1, len(t)+1):
            new_x[j] = max([Sm[j] + gap, Sx[j] + gap_e])
            new_y[j] = max([new_m[j-1] + gap, new_y[j-1] + gap_e])
            costM = [Sm[j-1] + match_score(scores, s[i-1], t[j-1]),
                     new_x[j],
                     new_y[j],
                     0]
            new_m[j] = max(costM)
            traceback[i][j] = costM.index(new_m[j])

            if new_m[j] > best:
                best = new_m[j]
                best_pos = i, j

        Sx = new_x
        Sy = new_y
        Sm = new_m
    
    # Initialize the values of i, j
    i, j = best_pos
    
    # Initialize the aligned strings as the input strings.
    r, u = s[:i], t[:j]

    # Traceback to build alignment.
    while traceback[i][j] != 3 and i*j != 0:
        if traceback[i][j] == 0:
            i -= 1
            j -= 1
        elif traceback[i][j] == 1:
            i -= 1
        elif traceback[i][j] == 2:
            j -= 1
   
    r = r[i:]
    u = u[j:]
    
    return str(best), r, u
예제 #4
0
def alignment_score(s, t, scores, gap):
    ''' Returns two matrices of the edit distance and edit alignment between
        strings s and t.
    '''

    # Initialize the similarity and traceback matrices.
    S = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]
    traceback = [[3 for j in range(len(t)+1)] for i in range(len(s)+1)]

    best = 0
    best_pos = (0, 0)

    # Fill in the matrices.
    for i in range(1, len(s)+1):
        for j in range(1, len(t)+1):
            cost = [ S[i-1][j-1] + match_score(scores, s[i-1], t[j-1]),
                     S[i-1][j] + gap,
                     S[i][j-1] + gap,
                     0 ]
            S[i][j] = max(cost)
            traceback[i][j] = cost.index(S[i][j])

            if S[i][j] >= best:
                best = S[i][j]
                best_pos = (i, j)

    # Initialize the values of i,j as the index of the highest score.
    i, j = best_pos

    # Initialize the aligned strings as the prefix of the best position.
    r, u = s[:i], t[:j]
    
    # Trace back to the edge of the matrix starting at the best position.
    while traceback[i][j] != 3 and i*j != 0:
        if traceback[i][j] == 0: # a match
            i -= 1
            j -= 1
        elif traceback[i][j] == 1: # an insertion
            i -= 1
        elif traceback[i][j] == 2: # a deletion
            j -= 1

    # The optimal alignment is then the suffix of the end of the traceback.
    r = r[i:]
    u = u[j:]

    return str(best), r, u
예제 #5
0
def global_align(s, t, scores, gap):
    # Initialize the similarity matrix.
    S = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)]

    for i in range(1, len(s) + 1):
        S[i][0] = i * gap
    for j in range(1, len(t) + 1):
        S[0][j] = j * gap

    for i in range(1, len(s) + 1):
        for j in range(1, len(t) + 1):
            S[i][j] = max([
                S[i - 1][j - 1] + match_score(scores, s[i - 1], t[j - 1]),
                S[i - 1][j] + gap, S[i][j - 1] + gap
            ])

    return S[-1][-1]
예제 #6
0
def global_align(s, t, scores, gap):
    # Initialize the similarity matrix.
    S = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]

    # Each cell in the first row and column recieves a gap penalty.
    for i in range(1, len(s)+1):
        S[i][0] = i * gap
    for j in range(1, len(t)+1):
        S[0][j] = j * gap

    # Fill in the similarity matrix.
    for i in range(1, len(s)+1):
        for j in range(1, len(t)+1):
            S[i][j] = max([ S[i-1][j-1] + match_score(scores, s[i-1], t[j-1]),
                            S[i-1][j] + gap,
                            S[i][j-1] + gap ])

    # The max possible score is the last cell of the similarity matrix.
    return S[-1][-1]
예제 #7
0
def global_align_with_affine(s, t, scores, gap, gap_e):
    ''' Returns two matrices of the edit distance and edit alignment between
        strings s and t.
    '''

    # Initialize the three score matrices...
    M = [[0 for j in range(len(t) + 1)]
         for i in range(len(s) + 1)]  # a (mis)match
    X = [[0 for j in range(len(t) + 1)]
         for i in range(len(s) + 1)]  # a gap in X
    Y = [[0 for j in range(len(t) + 1)]
         for i in range(len(s) + 1)]  # a gap in Y

    # ...and the traceback matrices.
    traceM = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)]
    traceX = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)]
    traceY = [[0 for j in range(len(t) + 1)] for i in range(len(s) + 1)]

    # Initialize the edges of the X and Y matrices with an arbitrarily
    # large number (closer to negative infinity, the better) so it doesn't
    # affect calculations.
    for i in range(1, len(s) + 1):
        M[i][0] = gap + gap_e * (i - 1)
        X[i][0] = -9999
        Y[i][0] = -9999
    for j in range(1, len(t) + 1):
        M[0][j] = gap + gap_e * (j - 1)
        X[0][j] = -9999
        Y[0][j] = -9999

    # Fill in the matrices.
    for i in range(1, len(s) + 1):
        for j in range(1, len(t) + 1):
            costX = [M[i - 1][j] + gap, X[i - 1][j] + gap_e]
            X[i][j] = max(costX)
            traceX[i][j] = costX.index(X[i][j])

            costY = [M[i][j - 1] + gap, Y[i][j - 1] + gap_e]
            Y[i][j] = max(costY)
            traceY[i][j] = costY.index(Y[i][j])

            costM = [
                M[i - 1][j - 1] + match_score(scores, s[i - 1], t[j - 1]),
                X[i][j], Y[i][j]
            ]
            M[i][j] = max(costM)
            traceM[i][j] = costM.index(M[i][j])

    # The max possible score is found at the bottom-right of the match matrix
    max_score = M[-1][-1]

    # Initialize the aligned strings as the input strings.
    s_align, t_align = s, t

    # Get the traceback starting position, i.e. the greatest value.
    scores = [X[i][j], Y[i][j], M[i][j]]
    max_score = max(scores)
    traceback = scores.index(max_score)

    # Initialize the values of i,j
    i, j = len(s), len(t)

    # Traceback to build alignment.
    while i > 0 and j > 0:
        if traceback == 0:
            if traceX[i][j] == 0:
                traceback = 2
            i -= 1
            t_align = t_align[:j] + '-' + t_align[j:]

        elif traceback == 1:
            if traceY[i][j] == 0:
                traceback = 2
            j -= 1
            s_align = s_align[:i] + '-' + s_align[i:]

        elif traceback == 2:
            if traceM[i][j] == 1:
                traceback = 0
            elif traceM[i][j] == 2:
                traceback = 1
            else:
                i -= 1
                j -= 1

    # Fill in any leading gaps.
    for remaining in range(i):
        t_align = t_align[:0] + '-' + t_align[0:]
    for remaining in range(j):
        s_align = s_align[:0] + '-' + s_align[0:]

    return (str(max_score), s_align, t_align)
예제 #8
0
def global_align_with_affine(s, t, scores, gap, gap_e):
    ''' Returns two matrices of the edit distance and edit alignment between
        strings s and t.
    '''
    # Initialize the three score matrices...
    M = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # a (mis)match
    X = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # a gap in X
    Y = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)] # a gap in Y

    # ...and the traceback matrices.
    traceM = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]
    traceX = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]
    traceY = [[0 for j in range(len(t)+1)] for i in range(len(s)+1)]

    # Initialize the edges of the X and Y matrices with an arbitrarily
    # large number (closer to negative infinity, the better) so it doesn't
    # affect calculations.
    for i in range(1, len(s)+1):
        M[i][0] = gap + gap_e*(i-1)
        X[i][0] = -9999
        Y[i][0] = -9999
    for j in range(1, len(t)+1):
        M[0][j] = gap + gap_e*(j-1)
        X[0][j] = -9999
        Y[0][j] = -9999

    # Fill in the matrices.
    for i in range(1, len(s)+1):
        for j in range(1, len(t)+1):
            costX = [M[i-1][j] + gap,
                     X[i-1][j] + gap_e]
            X[i][j] = max(costX)
            traceX[i][j] = costX.index(X[i][j])
            
            costY = [M[i][j-1] + gap,
                     Y[i][j-1] + gap_e]
            Y[i][j] = max(costY)
            traceY[i][j] = costY.index(Y[i][j])

            costM = [M[i-1][j-1] + match_score(scores, s[i-1], t[j-1]),
                     X[i][j],
                     Y[i][j]]
            M[i][j] = max(costM)
            traceM[i][j] = costM.index(M[i][j])
            
    # The max possible score is found at the bottom-right of the match matrix
    max_score = M[-1][-1]

    # Initialize the aligned strings as the input strings.
    s_align, t_align = s, t

    # Get the traceback starting position, i.e. the greatest value.
    scores = [X[i][j], Y[i][j], M[i][j]]
    max_score = max(scores)
    traceback = scores.index(max_score)

    # Initialize the values of i,j
    i, j = len(s), len(t)

    # Traceback to build alignment.
    while i>0 and j>0:
        if traceback == 0:
            if traceX[i][j] == 0:
                traceback = 2
            i -= 1
            t_align = t_align[:j] + '-' + t_align[j:]

        elif traceback == 1:
            if traceY[i][j] == 0:
                traceback = 2
            j -= 1
            s_align = s_align[:i] + '-' + s_align[i:]

        elif traceback == 2:
            if traceM[i][j] == 1:
                traceback = 0
            elif traceM[i][j] == 2:
                traceback = 1
            else:
                i -= 1
                j -= 1

    # Fill in any leading gaps.
    for remaining in range(i):
        t_align = t_align[:0] + '-' + t_align[0:]
    for remaining in range(j):
        s_align = s_align[:0] + '-' + s_align[0:]

    return str(max_score), s_align, t_align