def test_nucleotide_aligners_use_substitution_matrices(self): alt_sub = make_identity_substitution_matrix(10, -10) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with local alignment actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score()) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with global alignment actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = global_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score())
def test_nucleotide_aligners_use_substitution_matrices(self): alt_sub = make_identity_substitution_matrix(10, -10) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with local alignment with warnings.catch_warnings(): warnings.simplefilter("ignore") actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score()) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with global alignment with warnings.catch_warnings(): warnings.simplefilter("ignore") actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = global_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score())
def test_local_pairwise_align_nucleotide(self): expected = ("ACCTTGACCAGGTACC", "ACTTTGAC---GTAAC", 41.0, 1, 2) actual = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(1, 16), (2, 14)]) self.assertEqual(actual.ids(), list('01')) expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2) actual = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)]) self.assertEqual(actual.ids(), list('01')) # DNA (rather than str) as input expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2) actual = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC", "s1"), DNA("GAACTTTGACGTAAC", "s2"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # Fails when either input is passed as an Alignment self.assertRaises(TypeError, local_pairwise_align_nucleotide, Alignment([DNA("GACCTTGACCAGGTACC", "s1")]), DNA("GAACTTTGACGTAAC", "s2"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertRaises(TypeError, local_pairwise_align_nucleotide, DNA("GACCTTGACCAGGTACC", "s1"), Alignment([DNA("GAACTTTGACGTAAC", "s2")]), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) # ids are provided if they're not passed in actual = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(actual.ids(), list('01')) # TypeError on invalid input self.assertRaises(TypeError, local_pairwise_align_nucleotide, 42, "HEAGAWGHEE") self.assertRaises(TypeError, local_pairwise_align_nucleotide, "HEAGAWGHEE", 42)
def test_local_pairwise_align_nucleotide(self): obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(obs_msa, TabularMSA([DNA("ACCTTGACCAGGTACC"), DNA("ACTTTGAC---GTAAC")])) self.assertEqual(obs_score, 41.0) self.assertEqual(obs_start_end, [(1, 16), (2, 14)]) obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(obs_msa, TabularMSA([DNA("ACCTTGAC"), DNA("ACTTTGAC")])) self.assertEqual(obs_score, 31.0) self.assertEqual(obs_start_end, [(1, 8), (2, 9)]) # DNA sequences with metadata obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}), DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual( obs_msa, TabularMSA([DNA("ACCTTGAC", metadata={'id': "s1"}), DNA("ACTTTGAC", metadata={'id': "s2"})])) self.assertEqual(obs_score, 31.0) self.assertEqual(obs_start_end, [(1, 8), (2, 9)]) # Fails when either input is passed as a TabularMSA self.assertRaises(TypeError, local_pairwise_align_nucleotide, TabularMSA([DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"})]), DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertRaises(TypeError, local_pairwise_align_nucleotide, DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}), TabularMSA([DNA("GAACTTTGACGTAAC", metadata={'id': "s2"})]), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) # TypeError on invalid input self.assertRaises(TypeError, local_pairwise_align_nucleotide, 42, DNA("ACGT")) self.assertRaises(TypeError, local_pairwise_align_nucleotide, DNA("ACGT"), 42)
def test_nucleotide_aligners_use_substitution_matrices(self): alt_sub = make_identity_substitution_matrix(10, -10) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with local alignment msa_no_sub, score_no_sub, start_end_no_sub = \ local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) msa_alt_sub, score_alt_sub, start_end_alt_sub = \ local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(msa_no_sub, msa_alt_sub) self.assertNotEqual(score_no_sub, score_alt_sub) self.assertNotEqual(start_end_no_sub, start_end_alt_sub) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with global alignment msa_no_sub, score_no_sub, start_end_no_sub = \ global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) msa_alt_sub, score_alt_sub, start_end_alt_sub = \ global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(msa_no_sub, msa_alt_sub) self.assertNotEqual(score_no_sub, score_alt_sub) self.assertEqual(start_end_no_sub, start_end_alt_sub)
def dnaAlign(seq1, seq2, gap_open_penalty, gap_extend_penalty, local=False): seq1 = seq1.upper() seq2 = seq2.upper() if local: aln, score, _ = local_pairwise_align_nucleotide( DNA(seq1), DNA(seq2), gap_open_penalty, gap_extend_penalty) else: aln, score, _ = global_pairwise_align_nucleotide( DNA(seq1), DNA(seq2), gap_open_penalty, gap_extend_penalty) response = { 'aln1': str(aln[0]), 'aln2': str(aln[1]), 'score': score, 'similarity': float('{:.2f}'.format(aln[0].match_frequency(aln[1], relative=True) * 100)) } return response
def test_local_pairwise_align_nucleotide(self): expected = ("ACCTTGACCAGGTACC", "ACTTTGAC---GTAAC", 41.0, 1, 2) actual = local_pairwise_align_nucleotide("GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(1, 16), (2, 14)]) self.assertEqual(actual.ids(), list('01')) expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2) actual = local_pairwise_align_nucleotide("GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)]) self.assertEqual(actual.ids(), list('01')) # DNA (rather than str) as input expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2) actual = local_pairwise_align_nucleotide(DNA("GACCTTGACCAGGTACC", "s1"), DNA("GAACTTTGACGTAAC", "s2"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # Fails when either input is passed as an Alignment self.assertRaises(TypeError, local_pairwise_align_nucleotide, Alignment([DNA("GACCTTGACCAGGTACC", "s1")]), DNA("GAACTTTGACGTAAC", "s2"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertRaises(TypeError, local_pairwise_align_nucleotide, DNA("GACCTTGACCAGGTACC", "s1"), Alignment([DNA("GAACTTTGACGTAAC", "s2")]), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) # ids are provided if they're not passed in actual = local_pairwise_align_nucleotide(DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(actual.ids(), list('01')) # TypeError on invalid input self.assertRaises(TypeError, local_pairwise_align_nucleotide, 42, "HEAGAWGHEE") self.assertRaises(TypeError, local_pairwise_align_nucleotide, "HEAGAWGHEE", 42)