def test_compute_score_and_traceback_matrices(self): # these results were computed manually expected_score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 6], [-11, -7, -3, 1]] expected_tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 2]] m = _make_nt_substitution_matrix(2, -1) actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices( 'ACG', 'ACGT', 5, 2, m) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m) # different sequences # these results were computed manually expected_score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 3], [-11, -7, -3, -2]] expected_tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 1]] m = _make_nt_substitution_matrix(2, -1) actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices( 'ACC', 'ACGT', 5, 2, m) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m)
def test_make_nt_substitution_matrix(self): expected = {'A': {'A': 1, 'C': -2, 'G': -2, 'T': -2}, 'C': {'A': -2, 'C': 1, 'G': -2, 'T': -2}, 'G': {'A': -2, 'C': -2, 'G': 1, 'T': -2}, 'T': {'A': -2, 'C': -2, 'G': -2, 'T': 1}} self.assertEqual(_make_nt_substitution_matrix(1, -2), expected) expected = {'A': {'A': 5, 'C': -4, 'G': -4, 'T': -4}, 'C': {'A': -4, 'C': 5, 'G': -4, 'T': -4}, 'G': {'A': -4, 'C': -4, 'G': 5, 'T': -4}, 'T': {'A': -4, 'C': -4, 'G': -4, 'T': 5}} self.assertEqual(_make_nt_substitution_matrix(5, -4), expected)
def test_nucleotide_aligners_use_substitution_matrices(self): alt_sub = _make_nt_substitution_matrix(10, -10) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with local alignment with warnings.catch_warnings(): warnings.simplefilter("ignore") actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score()) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with global alignment with warnings.catch_warnings(): warnings.simplefilter("ignore") actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = global_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score())
def test_compute_score_and_traceback_matrices_invalid(self): # if the sequence contains a character that is not in the # substitution matrix, an informative error should be raised m = _make_nt_substitution_matrix(2, -1) self.assertRaises(ValueError, _compute_score_and_traceback_matrices, 'AWG', 'ACGT', 5, 2, m)