def test_global_pairwise_align_protein_penalize_terminal_gaps(self): obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5., penalize_terminal_gaps=True) self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHEE"), Protein("---PAWHEAE")])) self.assertEqual(obs_score, 1.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)])
def test_global_pairwise_align_protein_penalize_terminal_gaps(self): expected = ("HEAGAWGHEE", "---PAWHEAE", 1.0) actual = global_pairwise_align_protein( "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10., gap_extend_penalty=5., penalize_terminal_gaps=True) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), list('01'))
def test_global_pairwise_align_protein_penalize_terminal_gaps(self): expected = ("HEAGAWGHEE", "---PAWHEAE", 1.0) actual = global_pairwise_align_protein("HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10., gap_extend_penalty=5., penalize_terminal_gaps=True) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), list('01'))
def test_global_pairwise_align_protein_invalid_dtype(self): with self.assertRaisesRegex(TypeError, "TabularMSA with Protein dtype.*dtype " "'DNA'"): global_pairwise_align_protein(TabularMSA([Protein('PAW')]), TabularMSA([DNA('ACGT')]))
def test_global_pairwise_align_protein(self): obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHEE-"), Protein("---PAW-HEAE")])) self.assertEqual(obs_score, 23.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # EMBOSS result: P---AW-HEAE obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=5., gap_extend_penalty=0.5) self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHE-E"), Protein("---PAW-HEAE")])) self.assertEqual(obs_score, 30.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # Protein sequences with metadata obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}), Protein("---PAW-HEAE", metadata={'id': "s2"})])) self.assertEqual(obs_score, 23.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # One TabularMSA and one Protein as input obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"})]), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}), Protein("---PAW-HEAE", metadata={'id': "s2"})])) self.assertEqual(obs_score, 23.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # One single-sequence alignment as input and one double-sequence # alignment as input. Score confirmed manually. obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("HDAGAWGHDE", metadata={'id': "s2"})]), TabularMSA([Protein("PAWHEAE", metadata={'id': "s3"})]), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}), Protein("HDAGAWGHDE-", metadata={'id': "s2"}), Protein("---PAW-HEAE", metadata={'id': "s3"})])) self.assertEqual(obs_score, 21.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # TypeError on invalid input self.assertRaises(TypeError, global_pairwise_align_protein, 42, Protein("HEAGAWGHEE")) self.assertRaises(TypeError, global_pairwise_align_protein, Protein("HEAGAWGHEE"), 42)
def test_global_pairwise_align_protein(self): expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0) actual = global_pairwise_align_protein("HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), list('01')) expected = ("HEAGAWGHE-E", "---PAW-HEAE", 30.0) # EMBOSS result: P---AW-HEAE actual = global_pairwise_align_protein("HEAGAWGHEE", "PAWHEAE", gap_open_penalty=5., gap_extend_penalty=0.5) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), list('01')) # Protein (rather than str) as input expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0) actual = global_pairwise_align_protein(Protein("HEAGAWGHEE", "s1"), Protein("PAWHEAE", "s2"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # One Alignment and one Protein as input expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0) actual = global_pairwise_align_protein(Alignment( [Protein("HEAGAWGHEE", "s1")]), Protein("PAWHEAE", "s2"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # One single-sequence alignment as input and one double-sequence # alignment as input. Score confirmed manually. expected = ("HEAGAWGHEE-", "HDAGAWGHDE-", "---PAW-HEAE", 21.0) actual = global_pairwise_align_protein(Alignment( [Protein("HEAGAWGHEE", "s1"), Protein("HDAGAWGHDE", "s2")]), Alignment( [Protein("PAWHEAE", "s3")]), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(str(actual[2]), expected[2]) self.assertEqual(actual.score(), expected[3]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), ["s1", "s2", "s3"]) # ids are provided if they're not passed in actual = global_pairwise_align_protein(Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(actual.ids(), list('01')) # TypeError on invalid input self.assertRaises(TypeError, global_pairwise_align_protein, 42, "HEAGAWGHEE") self.assertRaises(TypeError, global_pairwise_align_protein, "HEAGAWGHEE", 42)
def test_global_pairwise_align_protein(self): expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0) actual = global_pairwise_align_protein( "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), list('01')) expected = ("HEAGAWGHE-E", "---PAW-HEAE", 30.0) # EMBOSS result: P---AW-HEAE actual = global_pairwise_align_protein( "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=5., gap_extend_penalty=0.5) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), list('01')) # Protein (rather than str) as input expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0) actual = global_pairwise_align_protein( Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # One Alignment and one Protein as input expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0) actual = global_pairwise_align_protein( Alignment([Protein("HEAGAWGHEE", metadata={'id': "s1"})]), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # One single-sequence alignment as input and one double-sequence # alignment as input. Score confirmed manually. expected = ("HEAGAWGHEE-", "HDAGAWGHDE-", "---PAW-HEAE", 21.0) actual = global_pairwise_align_protein( Alignment([Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("HDAGAWGHDE", metadata={'id': "s2"})]), Alignment([Protein("PAWHEAE", metadata={'id': "s3"})]), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(str(actual[2]), expected[2]) self.assertEqual(actual.score(), expected[3]) self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)]) self.assertEqual(actual.ids(), ["s1", "s2", "s3"]) # ids are provided if they're not passed in actual = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(actual.ids(), list('01')) # TypeError on invalid input self.assertRaises(TypeError, global_pairwise_align_protein, 42, "HEAGAWGHEE") self.assertRaises(TypeError, global_pairwise_align_protein, "HEAGAWGHEE", 42)