def test_sequences3(): s1 = "GAAAAAAAT" s2 = "GAATA" value, matrix = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(matrix, s1, s2, gap='-') algn_sol1 = [list('GAA-AAAAAT'), list('GAATA-----')] assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]
def test_sequences3(): s1 = "GAAAAAAAT" s2 = "GAATA" value, scores, paths = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-') algn_sol1 = [list('GAAAAAAAT'), list('G---AATA-')] assert value == -1 assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]
def route_mismatch_factor(map_con, path_pred, path_grnd, window=None, dist_fn=None, keep_mismatches=False): # type: (BaseMap, List[int], List[int], Optional[int], Optional[Callable], bool) -> Tuple[float, float, float, float, List[Tuple[int, int]], float, float] """Evaluation method from Newson and Krumm (2009). :math:`f = \frac{d_{-} + d_{+}}{d_0}` With :math:`d_{-}` the length that is erroneously subtracted, :math:`d_{+}` the length that is erroneously added, and :math:`d_0` the distance of the correct route. This function only supports connected states (thus not switching between states that are not connected (e.g. parallel roads). Also computes the Accuracy by Number (AN) and Accuracy by Length (AL) metrics from Zheng et al. (2009). """ if dist_fn is None: dist_fn = dist_latlon.distance _, matrix = needleman_wunsch(path_pred, path_grnd, window=window) print(matrix[:10, :10]) algn, _, _ = best_alignment(matrix) print(algn[:10]) d_plus = 0 # length erroneously added d_min = 0 # length erroneously subtracted d_zero = 0 # length of correct route cnt_matches = 0 # number of perfect matches cnt_mismatches = 0 mismatches = [] if keep_mismatches else None prev_grnd_pi = None for pred_pi, grnd_pi in algn: pred_p = path_pred[pred_pi] grnd_p = path_grnd[grnd_pi] grnd_d = map_con.path_dist(grnd_p) d_zero += grnd_d if pred_p == grnd_p: cnt_matches += 1 else: # print(f"Mismatch: {pred_p} != {grnd_p}") cnt_mismatches += 1 pred_d = map_con.path_dist(pred_p) d_plus += pred_d d_min += grnd_d if keep_mismatches: mismatches.append((pred_p, grnd_p)) prev_grnd_pi = grnd_pi factor = (d_min + d_plus) / d_zero an = cnt_matches / len(path_grnd) al = (d_zero - d_min) / d_zero return factor, cnt_matches, cnt_mismatches, d_zero, mismatches, an, al
def test_sequences_blosum(): scores = read_substitution_matrix( Path(__file__).parent / "rsrc" / "substitution.txt") substitution = alignment.make_substitution_fn(scores) s1 = "AGACTAGTTAC" s2 = "CGAGACGT" value, scores, paths = alignment.needleman_wunsch( s1, s2, substitution=substitution) algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-') algn_sol1 = [list('--AGACTAGTTAC'), list('CGAGAC--G-T--')] assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]
def test_sequences4(): s1 = "AGACTAGTTACC" s2 = "CGAGACGTC" value, matrix = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(matrix, s1, s2, gap='-') # print(matrix) # print(algn) # print(s1a1) # print(s2a1) algn_sol1 = [list("--AGACTAGTTACC"), list("CGAGAC--GT--C-")] assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]
def test_sequences_custom(): scores = read_substitution_matrix( Path(__file__).parent / "rsrc" / "substitution.txt") substitution = alignment.make_substitution_fn(scores) s1 = "CCAGG" s2 = "CCGA" value, scores, paths = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-') algn_sol1 = [list("CCAGG"), list("CC-GA")] assert value == 1 assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1] value, scores, paths = alignment.needleman_wunsch( s1, s2, substitution=substitution) algn, s1a2, s2a2 = alignment.best_alignment(paths, s1, s2, gap='-') algn_sol2 = [list("CC-AGG"), list("CCGA--")] assert s1a2 == algn_sol2[0] assert s2a2 == algn_sol2[1]
def test_sequences5(): s1 = "ATGAGT" s2 = "ATGGCGT" value, scores, paths = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-') # print(scores) # print(algn) # print(s1a1) # print(s2a1) algn_sol1 = [list("AT-GAGT"), list("ATGGCGT")] assert value == 3 assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]
def test_sequences4(): s1 = "AGACTAGTTACC" s2 = "CGAGACGTC" value, scores, paths = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(paths, s1, s2, gap='-') # print(scores) # print(algn) # print(s1a1) # print(s2a1) algn_sol1 = [list("--AGACTAGTTACC"), list("CGAGAC--G-T--C")] assert value == 0 assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]
def test_sequences1(): """Example from https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm . """ s1 = "GATTACA" s2 = "GCATGCU" value, matrix = alignment.needleman_wunsch(s1, s2) algn, s1a1, s2a1 = alignment.best_alignment(matrix, s1, s2, gap='-') matrix_sol = [[-0., -1., -2., -3., -4., -5., -6., -7.], [-1., 1., -0., -1., -2., -3., -4., -5.], [-2., -0., -0., 1., -0., -1., -2., -3.], [-3., -1., -1., -0., 2., 1., -0., -1.], [-4., -2., -2., -1., 1., 1., -0., -1.], [-5., -3., -3., -1., -0., -0., -0., -1.], [-6., -4., -2., -2., -1., -1., 1., -0.], [-7., -5., -3., -1., -2., -2., -0., -0.]] algn_sol1 = [['G', '-', 'A', 'T', 'T', 'A', 'C', 'A'], ['G', 'C', 'A', 'T', '-', 'G', 'C', 'U']] assert value == 0.0 assert np.array_equal(matrix, matrix_sol) assert s1a1 == algn_sol1[0] assert s2a1 == algn_sol1[1]