예제 #1
0
    def test_global_pairwise_align_protein_penalize_terminal_gaps(self):
        obs_msa, obs_score, obs_start_end = global_pairwise_align_protein(
            Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10.,
            gap_extend_penalty=5., penalize_terminal_gaps=True)

        self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHEE"),
                                              Protein("---PAWHEAE")]))
        self.assertEqual(obs_score, 1.0)
        self.assertEqual(obs_start_end, [(0, 9), (0, 6)])
예제 #2
0
 def test_global_pairwise_align_protein_penalize_terminal_gaps(self):
     expected = ("HEAGAWGHEE", "---PAWHEAE", 1.0)
     actual = global_pairwise_align_protein(
         "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10.,
         gap_extend_penalty=5., penalize_terminal_gaps=True)
     self.assertEqual(str(actual[0]), expected[0])
     self.assertEqual(str(actual[1]), expected[1])
     self.assertEqual(actual.score(), expected[2])
     self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
     self.assertEqual(actual.ids(), list('01'))
예제 #3
0
 def test_global_pairwise_align_protein_penalize_terminal_gaps(self):
     expected = ("HEAGAWGHEE", "---PAWHEAE", 1.0)
     actual = global_pairwise_align_protein("HEAGAWGHEE",
                                            "PAWHEAE",
                                            gap_open_penalty=10.,
                                            gap_extend_penalty=5.,
                                            penalize_terminal_gaps=True)
     self.assertEqual(str(actual[0]), expected[0])
     self.assertEqual(str(actual[1]), expected[1])
     self.assertEqual(actual.score(), expected[2])
     self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
     self.assertEqual(actual.ids(), list('01'))
예제 #4
0
 def test_global_pairwise_align_protein_invalid_dtype(self):
     with self.assertRaisesRegex(TypeError,
                                 "TabularMSA with Protein dtype.*dtype "
                                 "'DNA'"):
         global_pairwise_align_protein(TabularMSA([Protein('PAW')]),
                                       TabularMSA([DNA('ACGT')]))
예제 #5
0
    def test_global_pairwise_align_protein(self):
        obs_msa, obs_score, obs_start_end = global_pairwise_align_protein(
            Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10.,
            gap_extend_penalty=5.)

        self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHEE-"),
                                              Protein("---PAW-HEAE")]))
        self.assertEqual(obs_score, 23.0)
        self.assertEqual(obs_start_end, [(0, 9), (0, 6)])

        # EMBOSS result: P---AW-HEAE
        obs_msa, obs_score, obs_start_end = global_pairwise_align_protein(
            Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=5.,
            gap_extend_penalty=0.5)

        self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHE-E"),
                                              Protein("---PAW-HEAE")]))
        self.assertEqual(obs_score, 30.0)
        self.assertEqual(obs_start_end, [(0, 9), (0, 6)])

        # Protein sequences with metadata
        obs_msa, obs_score, obs_start_end = global_pairwise_align_protein(
            Protein("HEAGAWGHEE", metadata={'id': "s1"}),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5.)

        self.assertEqual(
            obs_msa,
            TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}),
                        Protein("---PAW-HEAE", metadata={'id': "s2"})]))

        self.assertEqual(obs_score, 23.0)
        self.assertEqual(obs_start_end, [(0, 9), (0, 6)])

        # One TabularMSA and one Protein as input
        obs_msa, obs_score, obs_start_end = global_pairwise_align_protein(
            TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"})]),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5.)

        self.assertEqual(
            obs_msa,
            TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}),
                        Protein("---PAW-HEAE", metadata={'id': "s2"})]))

        self.assertEqual(obs_score, 23.0)
        self.assertEqual(obs_start_end, [(0, 9), (0, 6)])

        # One single-sequence alignment as input and one double-sequence
        # alignment as input. Score confirmed manually.
        obs_msa, obs_score, obs_start_end = global_pairwise_align_protein(
            TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"}),
                        Protein("HDAGAWGHDE", metadata={'id': "s2"})]),
            TabularMSA([Protein("PAWHEAE", metadata={'id': "s3"})]),
            gap_open_penalty=10., gap_extend_penalty=5.)

        self.assertEqual(
            obs_msa,
            TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}),
                        Protein("HDAGAWGHDE-", metadata={'id': "s2"}),
                        Protein("---PAW-HEAE", metadata={'id': "s3"})]))

        self.assertEqual(obs_score, 21.0)
        self.assertEqual(obs_start_end, [(0, 9), (0, 6)])

        # TypeError on invalid input
        self.assertRaises(TypeError, global_pairwise_align_protein,
                          42, Protein("HEAGAWGHEE"))
        self.assertRaises(TypeError, global_pairwise_align_protein,
                          Protein("HEAGAWGHEE"), 42)
예제 #6
0
    def test_global_pairwise_align_protein(self):
        expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0)
        actual = global_pairwise_align_protein("HEAGAWGHEE",
                                               "PAWHEAE",
                                               gap_open_penalty=10.,
                                               gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("HEAGAWGHE-E", "---PAW-HEAE", 30.0)
        # EMBOSS result: P---AW-HEAE
        actual = global_pairwise_align_protein("HEAGAWGHEE",
                                               "PAWHEAE",
                                               gap_open_penalty=5.,
                                               gap_extend_penalty=0.5)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), list('01'))

        # Protein (rather than str) as input
        expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0)
        actual = global_pairwise_align_protein(Protein("HEAGAWGHEE", "s1"),
                                               Protein("PAWHEAE", "s2"),
                                               gap_open_penalty=10.,
                                               gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # One Alignment and one Protein as input
        expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0)
        actual = global_pairwise_align_protein(Alignment(
            [Protein("HEAGAWGHEE", "s1")]),
                                               Protein("PAWHEAE", "s2"),
                                               gap_open_penalty=10.,
                                               gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # One single-sequence alignment as input and one double-sequence
        # alignment as input. Score confirmed manually.
        expected = ("HEAGAWGHEE-", "HDAGAWGHDE-", "---PAW-HEAE", 21.0)
        actual = global_pairwise_align_protein(Alignment(
            [Protein("HEAGAWGHEE", "s1"),
             Protein("HDAGAWGHDE", "s2")]),
                                               Alignment(
                                                   [Protein("PAWHEAE", "s3")]),
                                               gap_open_penalty=10.,
                                               gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(str(actual[2]), expected[2])
        self.assertEqual(actual.score(), expected[3])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), ["s1", "s2", "s3"])

        # ids are provided if they're not passed in
        actual = global_pairwise_align_protein(Protein("HEAGAWGHEE"),
                                               Protein("PAWHEAE"),
                                               gap_open_penalty=10.,
                                               gap_extend_penalty=5.)
        self.assertEqual(actual.ids(), list('01'))

        # TypeError on invalid input
        self.assertRaises(TypeError, global_pairwise_align_protein, 42,
                          "HEAGAWGHEE")
        self.assertRaises(TypeError, global_pairwise_align_protein,
                          "HEAGAWGHEE", 42)
예제 #7
0
    def test_global_pairwise_align_protein(self):
        expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0)
        actual = global_pairwise_align_protein(
            "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=10.,
            gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("HEAGAWGHE-E", "---PAW-HEAE", 30.0)
        # EMBOSS result: P---AW-HEAE
        actual = global_pairwise_align_protein(
            "HEAGAWGHEE", "PAWHEAE", gap_open_penalty=5.,
            gap_extend_penalty=0.5)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), list('01'))

        # Protein (rather than str) as input
        expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0)
        actual = global_pairwise_align_protein(
            Protein("HEAGAWGHEE", metadata={'id': "s1"}),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # One Alignment and one Protein as input
        expected = ("HEAGAWGHEE-", "---PAW-HEAE", 23.0)
        actual = global_pairwise_align_protein(
            Alignment([Protein("HEAGAWGHEE", metadata={'id': "s1"})]),
            Protein("PAWHEAE", metadata={'id': "s2"}),
            gap_open_penalty=10., gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # One single-sequence alignment as input and one double-sequence
        # alignment as input. Score confirmed manually.
        expected = ("HEAGAWGHEE-", "HDAGAWGHDE-", "---PAW-HEAE", 21.0)
        actual = global_pairwise_align_protein(
            Alignment([Protein("HEAGAWGHEE", metadata={'id': "s1"}),
                       Protein("HDAGAWGHDE", metadata={'id': "s2"})]),
            Alignment([Protein("PAWHEAE", metadata={'id': "s3"})]),
            gap_open_penalty=10., gap_extend_penalty=5.)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(str(actual[2]), expected[2])
        self.assertEqual(actual.score(), expected[3])
        self.assertEqual(actual.start_end_positions(), [(0, 9), (0, 6)])
        self.assertEqual(actual.ids(), ["s1", "s2", "s3"])

        # ids are provided if they're not passed in
        actual = global_pairwise_align_protein(
            Protein("HEAGAWGHEE"),
            Protein("PAWHEAE"),
            gap_open_penalty=10., gap_extend_penalty=5.)
        self.assertEqual(actual.ids(), list('01'))

        # TypeError on invalid input
        self.assertRaises(TypeError, global_pairwise_align_protein,
                          42, "HEAGAWGHEE")
        self.assertRaises(TypeError, global_pairwise_align_protein,
                          "HEAGAWGHEE", 42)