Ejemplo n.º 1
0
def global_align(seq1, seq2, Parameters=Parameters()):
    M = create_matrix(len(seq1), len(seq2))
    Ix = create_matrix(len(seq1), len(seq2))
    Iy = create_matrix(len(seq1), len(seq2))

    #initializes matrices
    for i in range(0, len(seq1) + 1):
        for j in range(0, len(seq2) + 1):
            if i == 0 and j == 0:
                M[i][j] = 0
            elif i == 0:
                Iy[i][j] = 0
                Ix[i][j] = -INF
                M[i][j] = -INF
            elif j == 0:
                Ix[i][j] = 0
                Iy[i][j] = -INF
                M[i][j] = -INF

    #Affine Score
    for i in range(1, len(seq1) + 1):
        for j in range(1, len(seq2) + 1):

            score = Parameters.score(seq1[i - 1], seq2[j - 1])
            diag = M[i - 1][j - 1] + score
            esq = Ix[i - 1][j - 1] + score
            cim = Iy[i - 1][j - 1] + score

            M[i][j] = max(esq, diag, cim)

            Ix[i][j] = max(M[i - 1][j] + Parameters.gapopen + Parameters.gap,
                           Ix[i - 1][j] + Parameters.gap)

            Iy[i][j] = max(M[i][j - 1] + Parameters.gapopen + Parameters.gap,
                           Iy[i][j - 1] + Parameters.gap)

    if (DEBUG):
        print("\nIx")
        print_matrix("_" + seq1, "_" + seq2, Ix)
        print("\nIy")
        print_matrix("_" + seq1, "_" + seq2, Iy)
        print("\nM")
        print_matrix("_" + seq1, "_" + seq2, M)

    return M, Ix, Iy
Ejemplo n.º 2
0
def local_align(seq1, seq2, Parameters=Parameters()):
    M = create_matrix(len(seq1), len(seq2))

    score = 0
    li = 0
    lj = 0

    # fill in A in the right order
    for i in range(1, len(seq1) + 1):
        for j in range(1, len(seq2) + 1):

            # the local alignment recurrance rule:
            M[i][j] = max(
                M[i][j - 1] + Parameters.gap, M[i - 1][j] + Parameters.gap,
                M[i - 1][j - 1] + Parameters.score(seq1[i - 1], seq2[j - 1]),
                0)

            # track the cell with the largest score
            if M[i][j] >= score:
                score = M[i][j]
                li = i
                lj = j

    return M, li, lj
Ejemplo n.º 3
0
def traceback_left(M, Ix, Iy, seq1, seq2, Parameters):

    # As duas sequencias
    alignedseq1 = ""
    alignedseq2 = ""

    # Os ultimos indices
    i = len(seq1)
    j = len(seq2)

    #Matriz
    if (M[i][j] >= Ix[i][j] and M[i][j] >= Iy[i][j]):
        matrix = "M"
    elif (Ix[i][j] > Iy[i][j]):
        matrix = "Ix"
    else:
        matrix = "Iy"

    while ((i is not 0) or (j is not 0)):
        if matrix == "M":
            alignedseq2 = alignedseq2 + seq2[j - 1]
            alignedseq1 = alignedseq1 + seq1[i - 1]

            score = Parameters.score(seq1[i - 1], seq2[j - 1])

            diag = M[i - 1][j - 1] + score
            esq = Ix[i - 1][j - 1] + score
            cim = Iy[i - 1][j - 1] + score

            if M[i][j] == esq:
                matrix = "Ix"

            elif M[i][j] == diag:
                matrix = "M"

            elif M[i][j] == cim:
                matrix = "Iy"

            i = i - 1
            j = j - 1

        elif matrix == "Ix":
            alignedseq2 = alignedseq2 + "-"
            alignedseq1 = alignedseq1 + seq1[i - 1]

            if Ix[i][j] == M[i - 1][j] + Parameters.gapopen + Parameters.gap:
                matrix = "M"

            elif Ix[i][j] == Ix[i - 1][j] + Parameters.gap:
                matrix = "Ix"

            i = i - 1

        elif matrix == "Iy":
            alignedseq1 = alignedseq1 + '-'
            alignedseq2 = alignedseq2 + seq2[j - 1]

            if Iy[i][j] == M[i][j - 1] + Parameters.gapopen + Parameters.gap:
                matrix = "M"

            elif Iy[i][j] == Iy[i][j - 1] + Parameters.gap:
                matrix = "Iy"

            j = j - 1

    #Revertendo a String
    alignedseq1 = alignedseq1[::-1]
    alignedseq2 = alignedseq2[::-1]

    return alignedseq1, alignedseq2
Ejemplo n.º 4
0
            elif Iy[i][j] == M[i][j - 1] + Parameters.gapopen + Parameters.gap:
                matrix = "M"

            j = j - 1

    #Revertendo a String
    alignedseq1 = alignedseq1[::-1]
    alignedseq2 = alignedseq2[::-1]

    return alignedseq1, alignedseq2


if __name__ == '__main__':
    #Definicoes dos parametros
    par = Parameters(gapopen=-10, gap=-0.5, matrix='BLOSUM62', stype='protein')

    #Sequencias
    seq1 = io.read_fasta(io.read_file("../inputs/default1.fasta"))
    seq2 = io.read_fasta(io.read_file("../inputs/default2.fasta"))

    #Matriz de apontadores
    M, Ix, Iy = global_align(seq1, seq2, par)

    #Sequencias alinhadas
    leftalignedseq1, leftalignedseq2 = traceback_left(M, Ix, Iy, seq1, seq2,
                                                      par)
    upalignedseq1, upalignedseq2 = traceback_up(M, Ix, Iy, seq1, seq2, par)

    result = Alignment(leftalignedseq1, leftalignedseq2, "LEFT")
    result.calculate_mat_mis_gaps()
Ejemplo n.º 5
0
def traceback_left(M, Ix, Iy, seq1, seq2, Parameters, maxmatrix, li, lj):

    # As duas sequencias
    alignedseq1 = ""
    alignedseq2 = ""

    # Os indices do max score
    i = li
    j = lj

    #Matriz
    matrix = maxmatrix

    while True:
        print(matrix, i, j)
        if matrix == "M":
            alignedseq2 = alignedseq2 + seq2[j - 1]
            alignedseq1 = alignedseq1 + seq1[i - 1]

            if (M[i][j] == 0):
                break

            score = Parameters.score(seq1[i - 1], seq2[j - 1])

            diag = M[i - 1][j - 1] + score
            esq = Ix[i - 1][j - 1] + score
            cim = Iy[i - 1][j - 1] + score

            if M[i][j] == esq:
                matrix = "Ix"

            elif M[i][j] == diag:
                matrix = "M"

            elif M[i][j] == cim:
                matrix = "Iy"

            i = i - 1
            j = j - 1

        elif matrix == "Ix":
            alignedseq2 = alignedseq2 + "-"
            alignedseq1 = alignedseq1 + seq1[i - 1]

            if (Ix[i][j] == 0):
                break

            if Ix[i][j] == Ix[i - 1][j] + Parameters.gap:
                matrix = "Ix"

            elif Ix[i][j] == M[i - 1][j] + Parameters.gapopen + Parameters.gap:
                matrix = "M"

            i = i - 1

        elif matrix == "Iy":
            alignedseq1 = alignedseq1 + '-'
            alignedseq2 = alignedseq2 + seq2[j - 1]

            if (Iy[i][j] == 0):
                break

            if Iy[i][j] == M[i][j - 1] + Parameters.gapopen + Parameters.gap:
                matrix = "M"

            elif Iy[i][j] == Iy[i][j - 1] + Parameters.gap:
                matrix = "Iy"

            j = j - 1

    #Revertendo a String
    alignedseq1 = alignedseq1[::-1]
    alignedseq2 = alignedseq2[::-1]

    return alignedseq1, alignedseq2
Ejemplo n.º 6
0
def local_align(seq1, seq2, Parameters=Parameters()):
    M = create_matrix(len(seq1), len(seq2))
    Ix = create_matrix(len(seq1), len(seq2))
    Iy = create_matrix(len(seq1), len(seq2))

    #initializes matrices
    for i in range(0, len(seq1) + 1):
        for j in range(0, len(seq2) + 1):
            if i == 0 and j == 0:
                Iy[i][j] = Parameters.gapopen + (Parameters.gap * j)
                Ix[i][j] = Parameters.gapopen + (Parameters.gap * i)
                M[i][j] = 0
            elif i == 0:
                Iy[i][j] = Parameters.gapopen + (Parameters.gap * j)
                Ix[i][j] = -INF
                M[i][j] = -INF
            elif j == 0:
                Ix[i][j] = Parameters.gapopen + (Parameters.gap * i)
                Iy[i][j] = -INF
                M[i][j] = -INF

    #Max score
    maxmatrix = "M"
    max_score = 0
    li = 0
    lj = 0

    #Affine Score
    for i in range(1, len(seq1) + 1):
        for j in range(1, len(seq2) + 1):

            score = Parameters.score(seq1[i - 1], seq2[j - 1])
            diag = M[i - 1][j - 1] + score
            esq = Ix[i - 1][j - 1] + score
            cim = Iy[i - 1][j - 1] + score

            M[i][j] = max(esq, diag, cim, 0)

            if M[i][j] >= max_score:
                max_score = M[i][j]
                li = i
                lj = j
                maxmatrix = "M"

            Ix[i][j] = max(M[i - 1][j] + Parameters.gapopen + Parameters.gap,
                           Ix[i - 1][j] + Parameters.gap)

            if Ix[i][j] >= max_score:
                max_score = Ix[i][j]
                li = i
                lj = j
                maxmatrix = "Ix"

            Iy[i][j] = max(M[i][j - 1] + Parameters.gapopen + Parameters.gap,
                           Iy[i][j - 1] + Parameters.gap)

            if Iy[i][j] >= max_score:
                max_score = Iy[i][j]
                li = i
                lj = j
                maxmatrix = "Iy"

    if (DEBUG):
        print("\nIx")
        print_matrix("_" + seq1, "_" + seq2, Ix)
        print("\nIy")
        print_matrix("_" + seq1, "_" + seq2, Iy)
        print("\nM")
        print_matrix("_" + seq1, "_" + seq2, M)

    return M, Ix, Iy, maxmatrix, li, lj
Ejemplo n.º 7
0
def traceback(M, seq1, seq2, i, j, Parameters):
    """
    Esta funcao encontra o melhor aliamente seguindo o algoritmo a seguir:

        0- verificar se a celula corrente esta na borda
        1- obter os n vizinhos
        2- obter o valor da celula corrente
        3- Usar a formula
               a) M[i][j-1] + Parameters.gap == atual ?
               b) M[i-1][j-1] + Parameters.score(seq1[i-1], seq2[j-1]) == atual ?
               c) M[i-1][j] + Parameters.gap == atual ?
        4 - Criar strings alignedseq1 e alignedseq2
        5 - caso a) (esquerda)
             alignedseq2 + "_"
             alignedseq1 + seq1[j-1]   #"_"+seq1
             j = j-1
        
             caso b) (diagonal)
              alignedseq1 + seq1[j-1]
              alignedseq2 + seq2[i-1]
              j = j-1
              i = i-1
        
             caso c) (cima)
              alignedseq1 + "_"
              alignedseq2 + seq2[i-1]
              i = i-1
        6 - reverter a string

    OBS: J É LINHA e I É COLUNA
    """

    # As duas sequencias
    alignedseq1 = ""
    alignedseq2 = ""

    while M[i][j] != 0:

        #Primeira linha ou esquerda
        if j == 0 or (M[i][j] == M[i - 1][j] + Parameters.gap):
            #if M[i-1][j] is not 0:
            alignedseq2 = alignedseq2 + "-"
            alignedseq1 = alignedseq1 + seq1[i - 1]
            i = i - 1
            continue

        #Primeira coluna
        if i == 0:
            #if M[i][j-1] is not 0:
            alignedseq1 = alignedseq1 + "-"
            alignedseq2 = alignedseq2 + seq2[j - 1]
            j = j - 1
            continue

        #Diagonal
        if M[i][j] == M[i - 1][j - 1] + Parameters.score(
                seq1[i - 1], seq2[j - 1]):
            #if M[i-1][j-1] is not 0:
            alignedseq2 = alignedseq2 + seq2[j - 1]
            alignedseq1 = alignedseq1 + seq1[i - 1]
            j = j - 1
            i = i - 1
            continue

        #Cima
        if M[i][j] == M[i][j - 1] + Parameters.gap:
            #if M[i][j-1] is not 0:
            alignedseq1 = alignedseq1 + '-'
            alignedseq2 = alignedseq2 + seq2[j - 1]
            j = j - 1
            continue

    #Revertendo a String
    alignedseq1 = alignedseq1[::-1]
    alignedseq2 = alignedseq2[::-1]

    return alignedseq1, alignedseq2
Ejemplo n.º 8
0
        #Cima
        if M[i][j] == M[i][j - 1] + Parameters.gap:
            #if M[i][j-1] is not 0:
            alignedseq1 = alignedseq1 + '-'
            alignedseq2 = alignedseq2 + seq2[j - 1]
            j = j - 1
            continue

    #Revertendo a String
    alignedseq1 = alignedseq1[::-1]
    alignedseq2 = alignedseq2[::-1]

    return alignedseq1, alignedseq2


if __name__ == '__main__':

    #Definicoes dos parametros
    par = Parameters(gap=-10, matrix='BLOSUM62', stype='dna')

    seq1 = io.read_fasta(io.read_file("../inputs/default1.fasta"))
    seq2 = io.read_fasta(io.read_file("../inputs/default2.fasta"))

    matrix, i, j = local_align(seq1, seq2, par)

    alignedseq1, alignedseq2 = traceback(matrix, seq1, seq2, i, j, par)

    result = Alignment(alignedseq1, alignedseq2, "DEFAULT")
    result.calculate_mat_mis_gaps()

    io.write_file("../outputs/locally_local_linear_output.txt", str(result))