Python needleman_wunsch Exemples, dtaidistance.alignment.needleman_wunsch Python Exemples

Exemple #1

0

Afficher le fichier

def test_sequences3():
    s1 = "GAAAAAAAT"
    s2 = "GAATA"
    value, matrix = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(matrix, s1, s2, gap='-')
    algn_sol1 = [list('GAA-AAAAAT'), list('GAATA-----')]
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

Exemple #2

0

Afficher le fichier

def test_sequences3():
    s1 = "GAAAAAAAT"
    s2 = "GAATA"
    value, scores, paths = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-')
    algn_sol1 = [list('GAAAAAAAT'), list('G---AATA-')]
    assert value == -1
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

Exemple #3

0

Afficher le fichier

def route_mismatch_factor(map_con,
                          path_pred,
                          path_grnd,
                          window=None,
                          dist_fn=None,
                          keep_mismatches=False):
    # type: (BaseMap, List[int], List[int], Optional[int], Optional[Callable], bool) -> Tuple[float, float, float, float, List[Tuple[int, int]], float, float]
    """Evaluation method from Newson and Krumm (2009).

    :math:`f = \frac{d_{-} + d_{+}}{d_0}`

    With :math:`d_{-}` the length that is erroneously subtracted,
    :math:`d_{+}` the length that is erroneously added, and :math:`d_0` the
    distance of the correct route.

    This function only supports connected states (thus not switching between states
    that are not connected (e.g. parallel roads).

    Also computes the Accuracy by Number (AN) and Accuracy by Length (AL) metrics from
    Zheng et al. (2009).
    """
    if dist_fn is None:
        dist_fn = dist_latlon.distance
    _, matrix = needleman_wunsch(path_pred, path_grnd, window=window)
    print(matrix[:10, :10])
    algn, _, _ = best_alignment(matrix)
    print(algn[:10])
    d_plus = 0  # length erroneously added
    d_min = 0  # length erroneously subtracted
    d_zero = 0  # length of correct route
    cnt_matches = 0  # number of perfect matches
    cnt_mismatches = 0
    mismatches = [] if keep_mismatches else None

    prev_grnd_pi = None
    for pred_pi, grnd_pi in algn:
        pred_p = path_pred[pred_pi]
        grnd_p = path_grnd[grnd_pi]
        grnd_d = map_con.path_dist(grnd_p)
        d_zero += grnd_d
        if pred_p == grnd_p:
            cnt_matches += 1
        else:
            # print(f"Mismatch: {pred_p} != {grnd_p}")
            cnt_mismatches += 1
            pred_d = map_con.path_dist(pred_p)
            d_plus += pred_d
            d_min += grnd_d
            if keep_mismatches:
                mismatches.append((pred_p, grnd_p))
        prev_grnd_pi = grnd_pi

    factor = (d_min + d_plus) / d_zero
    an = cnt_matches / len(path_grnd)
    al = (d_zero - d_min) / d_zero
    return factor, cnt_matches, cnt_mismatches, d_zero, mismatches, an, al

Exemple #4

0

Afficher le fichier

def test_sequences_blosum():
    scores = read_substitution_matrix(
        Path(__file__).parent / "rsrc" / "substitution.txt")
    substitution = alignment.make_substitution_fn(scores)
    s1 = "AGACTAGTTAC"
    s2 = "CGAGACGT"
    value, scores, paths = alignment.needleman_wunsch(
        s1, s2, substitution=substitution)
    algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-')
    algn_sol1 = [list('--AGACTAGTTAC'), list('CGAGAC--G-T--')]
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

Exemple #5

0

Afficher le fichier

def test_sequences4():
    s1 = "AGACTAGTTACC"
    s2 = "CGAGACGTC"
    value, matrix = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(matrix, s1, s2, gap='-')
    # print(matrix)
    # print(algn)
    # print(s1a1)
    # print(s2a1)
    algn_sol1 = [list("--AGACTAGTTACC"), list("CGAGAC--GT--C-")]
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

Exemple #6

0

Afficher le fichier

def test_sequences_custom():

    scores = read_substitution_matrix(
        Path(__file__).parent / "rsrc" / "substitution.txt")
    substitution = alignment.make_substitution_fn(scores)

    s1 = "CCAGG"
    s2 = "CCGA"

    value, scores, paths = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-')
    algn_sol1 = [list("CCAGG"), list("CC-GA")]
    assert value == 1
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

    value, scores, paths = alignment.needleman_wunsch(
        s1, s2, substitution=substitution)
    algn, s1a2, s2a2 = alignment.best_alignment(paths, s1, s2, gap='-')
    algn_sol2 = [list("CC-AGG"), list("CCGA--")]
    assert s1a2 == algn_sol2[0]
    assert s2a2 == algn_sol2[1]

Exemple #7

0

Afficher le fichier

def test_sequences5():
    s1 = "ATGAGT"
    s2 = "ATGGCGT"
    value, scores, paths = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-')
    # print(scores)
    # print(algn)
    # print(s1a1)
    # print(s2a1)
    algn_sol1 = [list("AT-GAGT"), list("ATGGCGT")]
    assert value == 3
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

Exemple #8

0

Afficher le fichier

def test_sequences4():
    s1 = "AGACTAGTTACC"
    s2 = "CGAGACGTC"
    value, scores, paths = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-')
    # print(scores)
    # print(algn)
    # print(s1a1)
    # print(s2a1)
    algn_sol1 = [list("--AGACTAGTTACC"), list("CGAGAC--G-T--C")]
    assert value == 0
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]

Exemple #9

0

Afficher le fichier

def test_sequences1():
    """Example from https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm . """
    s1 = "GATTACA"
    s2 = "GCATGCU"
    value, matrix = alignment.needleman_wunsch(s1, s2)
    algn, s1a1, s2a1 = alignment.best_alignment(matrix, s1, s2, gap='-')
    matrix_sol = [[-0., -1., -2., -3., -4., -5., -6., -7.],
                  [-1., 1., -0., -1., -2., -3., -4., -5.],
                  [-2., -0., -0., 1., -0., -1., -2., -3.],
                  [-3., -1., -1., -0., 2., 1., -0., -1.],
                  [-4., -2., -2., -1., 1., 1., -0., -1.],
                  [-5., -3., -3., -1., -0., -0., -0., -1.],
                  [-6., -4., -2., -2., -1., -1., 1., -0.],
                  [-7., -5., -3., -1., -2., -2., -0., -0.]]
    algn_sol1 = [['G', '-', 'A', 'T', 'T', 'A', 'C', 'A'],
                 ['G', 'C', 'A', 'T', '-', 'G', 'C', 'U']]
    assert value == 0.0
    assert np.array_equal(matrix, matrix_sol)
    assert s1a1 == algn_sol1[0]
    assert s2a1 == algn_sol1[1]