Exemple #1
0
    def test_character_detection(self):
        subtitles = [
            Subtitle(0, 0, "Hello sir"),
            Subtitle(0, 0, "my name is micha"),
        ]

        script_entities = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": "Hallo Mr",
                "type": "speech"
            }),
            ScriptEntity(**{
                "character": "CHAR1",
                "text": "my name is michi",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities, subtitles, logger=self._logger)
        nw.pretty_print_grid(alignment)

        assert len(alignment.subtitles) == 2
        assert alignment.subtitles[0].character == "CHAR0"
        assert alignment.subtitles[1].character == "CHAR1"
	def test_character_difference_same_length(self):
		s1 = "millegeeee"
		s2 = "mille asde"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)

		assert nw_alignment.subtitles[0].character is "CHAR0"
Exemple #3
0
    def test_bio_string(self):
        subtitles = [
            Subtitle(0, 0, "GCATGCU"),
        ]

        script_entities = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": "GATTACA",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities, subtitles, logger=self._logger)
        nw.pretty_print_grid(alignment)
	def test_unseparated_words_2(self):
		s1 = "Millage Ville, Georgia."
		s2 = "Milledgeville, Georgia."

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
	def test_character_difference_repeat(self):
		s1 = "three"
		s2 = "threeeee"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.levenstein_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
Exemple #6
0
    def test_gap_penalty_alignment_adaptive(self):
        s1 = "GAAAAAAT"
        s1_index = [(s, None) for s in list(s1)]

        s2 = "GAAT"
        s2_index = [(s, None) for s in list(s2)]

        grid, traceback = nw.nw(s1_index, s2_index,
                                Weighting(1, -1, AdaptiveGapPenalty(-5, -1)))
        alignment = nw.calculate_backtrace(grid, traceback, s1_index, s2_index)

        nw.pretty_print_grid(alignment)
        s1_string = "".join([char for char, _ in alignment.vertical_index])
        s2_string = "".join([char for char, _ in alignment.horizontal_index])
        print(s1_string)
        print(s2_string)

        assert s2_string == "GAA____T"
Exemple #7
0
    def test_bio_string_2(self):
        subtitles = [
            Subtitle(0, 0, "CNJRQCLU"),
        ]

        script_entities = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": "CJRQDLN",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              Weighting(1, -1, AdaptiveGapPenalty(-5, -1)),
                              logger=self._logger)
        nw.pretty_print_grid(alignment)
	def test_unseperated_words_short(self):
		s1 = "All right"
		s2 = "Alright"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.levenstein_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is "CHAR0"

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
	def test_unseperated_words(self):
		s1 = "Hey, All right. All reight."
		s2 = "Alright...alright"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
	def test_unseperated_words_long(self):
		s1 = "Developed by Master Wuxi in the Third Dynasty"
		s2 = "DevelopedbyMasterWuxiInTheThirdDynasty"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"