Ejemplo n.º 1
0
class WebVTTReaderTestCase(unittest.TestCase):

    def setUp(self):
        self.reader = WebVTTReader()

    def test_positive_answer_for_detection(self):
        self.assertTrue(self.reader.detect(SAMPLE_WEBVTT.decode(u'utf-8')))

    def test_negative_answer_for_detection(self):
        self.assertFalse(self.reader.detect(SAMPLE_SRT.decode(u'utf-8')))

    def test_caption_length(self):
        captions = self.reader.read(SAMPLE_WEBVTT_2.decode(u'utf-8'))
        self.assertEqual(len(captions.get_captions(u'en-US')), 7)

    def test_read_supports_multiple_languages(self):
        captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8'), lang=u'es')
        self.assertIsNotNone(captions.get_captions(u'es'))

    def test_proper_timestamps(self):
        captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8'))
        cue = captions.get_captions(u'en-US')[2]
        self.assertEqual(cue.start, 17000000)
        self.assertEqual(cue.end, 18752000)

    def test_webvtt_cue_components_removed_from_text(self):
        result = self.reader._remove_styles(
            u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have "
            u"its shortcomings</i>, but it is<u> the largest</u> collective "
            u"knowledge construction endevour</c> <ruby>base text <rt>"
            u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!"
        )
        expected = (
            u"Wikipedia is a great adventure. It may have "
            u"its shortcomings, but it is the largest collective "
            u"knowledge construction endevour base text annotation"
            u" Audry: Yes, indeed!"
        )
        self.assertEqual(result, expected)

    def test_empty_file(self):
        self.assertRaises(
            CaptionReadNoCaptions,
            WebVTTReader().read, SAMPLE_WEBVTT_EMPTY.decode(u'utf-8'))

    def test_not_ignoring_timing_errors(self):
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"\n"
             u"00:00:20,000 --> 00:00:10,000\n"
             u"foo bar baz")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20,000 --> 00:00:10,000\n"
             u"Start time is greater than end time.\n")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20,000 --> 00:00:30,000\n"
             u"Start times should be consecutive.\n"
             u"\n"
             u"00:00:10,000 --> 00:00:20,000\n"
             u"This cue starts before the previous one.\n")
        )

    def test_ignoring_timing_errors(self):
        # Even if timing errors are ignored, this is worse
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            (u"\nNOTE invalid cue stamp\n"
             u"00:00:20,000 --> \n"
             u"foo bar baz\n")
        )

        try:
            WebVTTReader().read(
                (u"\n"
                 u"00:00:20,000 --> 00:00:10,000\n"
                 u"Start time is greater than end time.\n")
        )
        except CaptionReadError:
            self.fail(u"Shouldn't raise CaptionReadError")

        try:
            WebVTTReader().read(
                (u"\n"
                 u"00:00:20,000 --> 00:00:30,000\n"
                 u"Start times should be consecutive.\n"
                 u"\n"
                 u"00:00:10,000 --> 00:00:20,000\n"
                 u"This cue starts before the previous one.\n")

        )
        except CaptionReadError:
            self.fail(u"Shouldn't raise CaptionReadError")

    def test_invalid_files(self):
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            (u"\nNOTE Cues without text are invalid.\n"
            u"00:00:20,000 --> 00:00:30,000\n"
            u"\n"
            u"00:00:40,000 --> 00:00:50,000\n"
            u"foo bar baz\n")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20,000 --> 00:00:10,000\n"
            u"Start time is greater than end time.")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20,000 --> 00:00:30,000\n"
            u"Start times should be consecutive.\n"
            u"\n"
            u"00:00:10,000 --> 00:00:20,000\n"
            u"This cue starts before the previous one.\n")
        )
Ejemplo n.º 2
0
class WebVTTReaderTestCase(unittest.TestCase):

    def setUp(self):
        self.reader = WebVTTReader()

    def test_positive_answer_for_detection(self):
        self.assertTrue(self.reader.detect(SAMPLE_WEBVTT))

    def test_negative_answer_for_detection(self):
        self.assertFalse(self.reader.detect(SAMPLE_SRT))

    def test_caption_length(self):
        captions = self.reader.read(SAMPLE_WEBVTT_2)
        self.assertEqual(len(captions.get_captions(u'en-US')), 7)

    def test_read_supports_multiple_languages(self):
        captions = self.reader.read(SAMPLE_WEBVTT, lang=u'es')
        self.assertIsNotNone(captions.get_captions(u'es'))

    def test_proper_timestamps(self):
        captions = self.reader.read(SAMPLE_WEBVTT)
        cue = captions.get_captions(u'en-US')[2]
        self.assertEqual(cue.start, 17000000)
        self.assertEqual(cue.end, 18752000)

    def test_webvtt_cue_components_removed_from_text(self):
        result = self.reader._remove_styles(
            u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have "
            u"its shortcomings</i>, but it is<u> the largest</u> collective "
            u"knowledge construction endevour</c> <ruby>base text <rt>"
            u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!"
        )
        expected = (
            u"Wikipedia is a great adventure. It may have "
            u"its shortcomings, but it is the largest collective "
            u"knowledge construction endevour base text annotation"
            u" Audry: Yes, indeed!"
        )
        self.assertEqual(result, expected)

    def test_empty_file(self):
        self.assertRaises(
            CaptionReadNoCaptions,
            WebVTTReader().read, SAMPLE_WEBVTT_EMPTY)

    def test_not_ignoring_timing_errors(self):
        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"\n"
             u"00:00:20.000 --> 00:00:10.000\n"
             u"foo bar baz")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20.000 --> 00:00:10.000\n"
             u"Start time is greater than end time.\n")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20.000 --> 00:00:30.000\n"
             u"Start times should be consecutive.\n"
             u"\n"
             u"00:00:10.000 --> 00:00:20.000\n"
             u"This cue starts before the previous one.\n")
        )

    def test_ignoring_timing_errors(self):
        # Even if timing errors are ignored, this has to raise an exception
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            (u"\nNOTE invalid cue stamp\n"
             u"00:00:20.000 --> \n"
             u"foo bar baz\n")
        )

        # And this too
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            (u"\n00:00:20,000 --> 00:00:22,000\n"
             u"Note the comma instead of point.\n")
        )

        try:
            WebVTTReader().read(
                (u"\n"
                 u"00:00:20.000 --> 00:00:10.000\n"
                 u"Start time is greater than end time.\n")
            )
        except CaptionReadError:
            self.fail(u"Shouldn't raise CaptionReadError")

        try:
            WebVTTReader().read(
                (u"\n"
                 u"00:00:20.000 --> 00:00:30.000\n"
                 u"Start times should be consecutive.\n"
                 u"\n"
                 u"00:00:10.000 --> 00:00:20.000\n"
                 u"This cue starts before the previous one.\n")

            )
        except CaptionReadError:
            self.fail(u"Shouldn't raise CaptionReadError")

    def test_invalid_files(self):
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            (u"\nNOTE Cues without text are invalid.\n"
                u"00:00:20.000 --> 00:00:30.000\n"
                u"\n"
                u"00:00:40.000 --> 00:00:50.000\n"
                u"foo bar baz\n")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20.000 --> 00:00:10.000\n"
                u"Start time is greater than end time.")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            (u"00:00:20.000 --> 00:00:30.000\n"
                u"Start times should be consecutive.\n"
                u"\n"
                u"00:00:10.000 --> 00:00:20.000\n"
                u"This cue starts before the previous one.\n")
        )

    def test_zero_start(self):
        captions = self.reader.read(SAMPLE_WEBVTT_LAST_CUE_ZERO_START)
        cue = captions.get_captions(u'en-US')[0]
        self.assertEquals(cue.start, 0)
Ejemplo n.º 3
0
class WebVTTReaderTestCase(unittest.TestCase):

    def setUp(self):
        self.reader = WebVTTReader()

    def test_positive_answer_for_detection(self):
        self.assertTrue(self.reader.detect(SAMPLE_WEBVTT.decode(u'utf-8')))

    def test_negative_answer_for_detection(self):
        self.assertFalse(self.reader.detect(SAMPLE_SRT.decode(u'utf-8')))

    def test_caption_length(self):
        captions = self.reader.read(SAMPLE_WEBVTT_2.decode(u'utf-8'))
        self.assertEqual(len(captions.get_captions(u'en-US')), 7)

    def test_read_supports_multiple_languages(self):
        captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8'), lang=u'es')
        self.assertIsNotNone(captions.get_captions(u'es'))

    def test_proper_timestamps(self):
        captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8'))
        cue = captions.get_captions(u'en-US')[2]
        self.assertEqual(cue.start, 17000000)
        self.assertEqual(cue.end, 18752000)

    def test_webvtt_cue_components_removed_from_text(self):
        result = self.reader._remove_styles(
            u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have "
            u"its shortcomings</i>, but it is<u> the largest</u> collective "
            u"knowledge construction endevour</c> <ruby>base text <rt>"
            u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!"
        )
        expected = (
            u"Wikipedia is a great adventure. It may have "
            u"its shortcomings, but it is the largest collective "
            u"knowledge construction endevour base text annotation"
            u" Audry: Yes, indeed!"
        )
        self.assertEqual(result, expected)

    def test_empty_file(self):
        self.assertRaises(
            CaptionReadNoCaptions,
            WebVTTReader().read, SAMPLE_WEBVTT_EMPTY.decode(u'utf-8'))

    def test_invalid_files(self):
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            u"""
            NOTE Cues without text are invalid.

            00:00:20,000 --> 00:00:10,000
            """
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader().read,
            u"""
            00:00:20,000 --> 00:00:10,000
            Start time is greater than end time.
            """
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader().read,
            u"""
            00:00:20,000 --> 00:00:30,000
            Start times should be consecutive.

            00:00:10,000 --> 00:00:20,000
            This cue starts before the previous one.
            """
        )
Ejemplo n.º 4
0
class TestWebVTTReader:
    def setup_method(self):
        self.reader = WebVTTReader()

    def test_positive_answer_for_detection(self, sample_webvtt):
        assert self.reader.detect(sample_webvtt) is True

    def test_negative_answer_for_detection(self, sample_srt):
        assert self.reader.detect(sample_srt) is False

    def test_caption_length(self, sample_webvtt_2):
        captions = self.reader.read(sample_webvtt_2)

        assert len(captions.get_captions('en-US')) == 7

    def test_read_supports_multiple_languages(self, sample_webvtt):
        captions = self.reader.read(sample_webvtt, lang='es')

        assert captions.get_captions('es') is not None

    def test_proper_timestamps(self, sample_webvtt):
        captions = self.reader.read(sample_webvtt)
        cue = captions.get_captions('en-US')[2]

        assert cue.start == 17000000
        assert cue.end == 18752000

    def test_webvtt_cue_components_removed_from_text(self):
        result = self.reader._remove_styles(
            "<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have "
            "its shortcomings</i>, but it is<u> the largest</u> collective "
            "knowledge construction endevour</c> <ruby>base text <rt>"
            "annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!")
        expected = ("Wikipedia is a great adventure. It may have "
                    "its shortcomings, but it is the largest collective "
                    "knowledge construction endevour base text annotation"
                    " Audry: Yes, indeed!")
        assert result == expected

    def test_empty_file(self, sample_webvtt_empty):
        with pytest.raises(CaptionReadNoCaptions):
            WebVTTReader().read(sample_webvtt_empty)

    def test_not_ignoring_timing_errors(self):
        # todo: same assert w/ different arguments -> this can be parametrized;
        with pytest.raises(CaptionReadError):
            WebVTTReader(ignore_timing_errors=False).read(
                "\n"
                "00:00:20.000 --> 00:00:10.000\n"
                "foo bar baz")

        with pytest.raises(CaptionReadError):
            WebVTTReader(ignore_timing_errors=False).read(
                "00:00:20.000 --> 00:00:10.000\n"
                "Start time is greater than end time.\n")

        with pytest.raises(CaptionReadError):
            WebVTTReader(ignore_timing_errors=False).read(
                "00:00:20.000 --> 00:00:30.000\n"
                "Start times should be consecutive.\n"
                "\n"
                "00:00:10.000 --> 00:00:20.000\n"
                "This cue starts before the previous one.\n")

    def test_ignoring_timing_errors(self):
        # Even if timing errors are ignored, this has to raise an exception
        with pytest.raises(CaptionReadSyntaxError):
            WebVTTReader().read(
                "\nNOTE invalid cue stamp\n00:00:20.000 --> \nfoo bar baz\n")

        # And this too
        with pytest.raises(CaptionReadSyntaxError):
            WebVTTReader().read("\n00:00:20,000 --> 00:00:22,000\n"
                                "Note the comma instead of point.\n")

        # todo: at this point it can be split into 2 separate tests
        try:
            WebVTTReader().read("\n"
                                "00:00:20.000 --> 00:00:10.000\n"
                                "Start time is greater than end time.\n")
        except CaptionReadError:
            pytest.fail("Shouldn't raise CaptionReadError")

        try:
            WebVTTReader().read("\n"
                                "00:00:20.000 --> 00:00:30.000\n"
                                "Start times should be consecutive.\n"
                                "\n"
                                "00:00:10.000 --> 00:00:20.000\n"
                                "This cue starts before the previous one.\n")
        except CaptionReadError:
            pytest.fail("Shouldn't raise CaptionReadError")

    def test_invalid_files(self):
        with pytest.raises(CaptionReadError):
            WebVTTReader(ignore_timing_errors=False).read(
                "00:00:20.000 --> 00:00:10.000\n"
                "Start time is greater than end time.")

        with pytest.raises(CaptionReadError):
            WebVTTReader(ignore_timing_errors=False).read(
                "00:00:20.000 --> 00:00:30.000\n"
                "Start times should be consecutive.\n"
                "\n"
                "00:00:10.000 --> 00:00:20.000\n"
                "This cue starts before the previous one.\n")

    def test_zero_start(self, sample_webvtt_last_cue_zero_start):
        captions = self.reader.read(sample_webvtt_last_cue_zero_start)
        cue = captions.get_captions('en-US')[0]

        assert cue.start == 0

    def test_webvtt_empty_cue(self, sample_webvtt_empty_cue):
        assert 1 == len(
            self.reader.read(sample_webvtt_empty_cue).get_captions('en-US'))