Beispiel #1
0
    def test_1(self):
        subtitles = [
            Subtitle(0, 0, "hello my name is micha"),
            Subtitle(0, 0, "bla my bla is micha"),
            Subtitle(0, 0, "he is up"),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "hello my name is michi",
                    "type": "speech"
                }),
            ScriptEntity(**{
                "character": "CHAR1",
                "text": "who he is",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw_merger.binary_distance,
                              verbose=True)
        dtw_merger.pretty_print_grid(alignment)
	def test_doubling_in_subtitles(self):
		text_1 = "Broke in right on the two of them"
		text_2 = "No matter what they say, it's all about money"

		subtitles = mock_subtitles([text_1, text_1])
		speeches = mock_speeches([("CHAR0", text_1), ("CHAR1", text_2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is "CHAR0"
		assert dtw_alignment.subtitles[1].character is None

		seperator()

		nw_alignment = self._sut_nw(speeches, subtitles, Weighting(1, -1, GapPenalty(-2)), logger=self._logger)
		assert nw_alignment.subtitles[0].character is "CHAR0"
		assert nw_alignment.subtitles[1].character is None

		seperator()

		# different weighting won't change the results on doubled subtitles
		nw_alignment = self._sut_nw(speeches, subtitles, Weighting(1, -1, AdaptiveGapPenalty(-5, -1)),
		                            logger=self._logger)
		assert nw_alignment.subtitles[0].character is "CHAR0"
		assert nw_alignment.subtitles[1].character is None
	def test_different_order(self):
		s1 = "Broke in right on the two of them"
		s2 = "No matter what they say, it's all about money"

		subtitles = mock_subtitles([s1, s2])
		speeches = mock_speeches([("CHAR0", s2), ("CHAR1", s1)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None
		assert dtw_alignment.subtitles[1].character is "CHAR0"

		seperator()

		nw_alignment = self._sut_nw(speeches, subtitles, Weighting(1, -1, GapPenalty(-2)), logger=self._logger)
		assert nw_alignment.subtitles[0].character is None
		assert nw_alignment.subtitles[1].character is None

		seperator()

		# using the adaptive gap penalty, the first subtitle will be fully matched with gaps
		# and the second can match against the first script
		nw_alignment = self._sut_nw(speeches, subtitles, Weighting(1, -1, AdaptiveGapPenalty(-5, -1)),
		                            logger=self._logger)
		assert nw_alignment.subtitles[0].character is None
		assert nw_alignment.subtitles[1].character is "CHAR0"
	def test_matching_over_scripts(self):
		text_1 = "Broke in right on the two of them"
		text_2 = "No matter what they say, it's all about money"

		subtitles = mock_subtitles([text_1, text_1])
		speeches = mock_speeches([("CHAR0", text_1), ("CHAR1", text_2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is "CHAR0"
		assert dtw_alignment.subtitles[1].character is None
	def test_different_order_2(self):
		s1 = "No matter what they say"
		s2 = "It is all about money"

		subtitles = mock_subtitles([s1, s2])
		speeches = mock_speeches([("CHAR0", s2), ("CHAR1", s1)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is "CHAR1"
		assert dtw_alignment.subtitles[1].character is None
	def test_unseparated_words_2(self):
		s1 = "Millage Ville, Georgia."
		s2 = "Milledgeville, Georgia."

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
    def test_2(self):
        subtitles = [
            Subtitle(0, 0, "It's cute?"),
        ]

        script_entities = [
            ScriptEntity("CHAR0", "I sort of like it. I mean, it's cute."),
            ScriptEntity("CHAR1", "Cute?"),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw.levenstein_distance,
                              verbose=True)
        dtw.pretty_print_grid(alignment)
        assert alignment.subtitles[0].character == "CHAR0"
	def test_character_difference_repeat(self):
		s1 = "three"
		s2 = "threeeee"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger, distance_function=dtw.levenstein_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
	def test_unseperated_words_short(self):
		s1 = "All right"
		s2 = "Alright"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.levenstein_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is "CHAR0"

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
	def test_unseperated_words(self):
		s1 = "Hey, All right. All reight."
		s2 = "Alright...alright"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
	def test_unseperated_words_long(self):
		s1 = "Developed by Master Wuxi in the Third Dynasty"
		s2 = "DevelopedbyMasterWuxiInTheThirdDynasty"

		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		dtw_alignment = self._sut_dtw(speeches, subtitles, self._logger,
		                              distance_function=dtw.binary_distance)
		dtw.pretty_print_grid(dtw_alignment)
		assert dtw_alignment.subtitles[0].character is None

		seperator()
		subtitles = mock_subtitles([s1])
		speeches = mock_speeches([("CHAR0", s2)])

		nw_alignment = self._sut_nw(speeches, subtitles, logger=self._logger)
		nw.pretty_print_grid(nw_alignment)
		assert nw_alignment.subtitles[0].character is "CHAR0"
Beispiel #12
0
    def test_3(self):
        subtitles = [
            Subtitle(0, 0, "hello what are we doing"),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "hello what is it we do",
                    "type": "speech"
                }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw_merger.binary_distance,
                              verbose=True)
        dtw_merger.pretty_print_grid(alignment)
Beispiel #13
0
    def test_2(self):
        subtitles = [
            Subtitle(0, 0, "hello my name is micha"),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "hello my name micha is",
                    "type": "speech"
                }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw_merger.levenstein_distance,
                              verbose=True)
        dtw_merger.pretty_print_grid(alignment)
    def test_1(self):
        subtitles = [
            Subtitle(0, 0, "You're playing music?"),
        ]

        script_entities = [
            ScriptEntity(
                "CHAR0",
                "Yeah, they really want you... they really want you... they really do."
            ),
            ScriptEntity("CHAR1", "You guys are playing music?")
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw.levenstein_distance,
                              verbose=True)
        dtw.pretty_print_grid(alignment)
        assert alignment.subtitles[0].character == "CHAR1"