class WebVTTReaderTestCase(unittest.TestCase): def setUp(self): self.reader = WebVTTReader() def test_positive_answer_for_detection(self): self.assertTrue(self.reader.detect(SAMPLE_WEBVTT.decode(u'utf-8'))) def test_negative_answer_for_detection(self): self.assertFalse(self.reader.detect(SAMPLE_SRT.decode(u'utf-8'))) def test_caption_length(self): captions = self.reader.read(SAMPLE_WEBVTT_2.decode(u'utf-8')) self.assertEqual(len(captions.get_captions(u'en-US')), 7) def test_read_supports_multiple_languages(self): captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8'), lang=u'es') self.assertIsNotNone(captions.get_captions(u'es')) def test_proper_timestamps(self): captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8')) cue = captions.get_captions(u'en-US')[2] self.assertEqual(cue.start, 17000000) self.assertEqual(cue.end, 18752000) def test_webvtt_cue_components_removed_from_text(self): result = self.reader._remove_styles( u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have " u"its shortcomings</i>, but it is<u> the largest</u> collective " u"knowledge construction endevour</c> <ruby>base text <rt>" u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!" ) expected = ( u"Wikipedia is a great adventure. It may have " u"its shortcomings, but it is the largest collective " u"knowledge construction endevour base text annotation" u" Audry: Yes, indeed!" ) self.assertEqual(result, expected) def test_empty_file(self): self.assertRaises( CaptionReadNoCaptions, WebVTTReader().read, SAMPLE_WEBVTT_EMPTY.decode(u'utf-8')) def test_not_ignoring_timing_errors(self): self.assertRaises( CaptionReadSyntaxError, WebVTTReader(ignore_timing_errors=False).read, (u"\n" u"00:00:20,000 --> 00:00:10,000\n" u"foo bar baz") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20,000 --> 00:00:10,000\n" u"Start time is greater than end time.\n") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20,000 --> 00:00:30,000\n" u"Start times should be consecutive.\n" u"\n" u"00:00:10,000 --> 00:00:20,000\n" u"This cue starts before the previous one.\n") ) def test_ignoring_timing_errors(self): # Even if timing errors are ignored, this is worse self.assertRaises( CaptionReadSyntaxError, WebVTTReader().read, (u"\nNOTE invalid cue stamp\n" u"00:00:20,000 --> \n" u"foo bar baz\n") ) try: WebVTTReader().read( (u"\n" u"00:00:20,000 --> 00:00:10,000\n" u"Start time is greater than end time.\n") ) except CaptionReadError: self.fail(u"Shouldn't raise CaptionReadError") try: WebVTTReader().read( (u"\n" u"00:00:20,000 --> 00:00:30,000\n" u"Start times should be consecutive.\n" u"\n" u"00:00:10,000 --> 00:00:20,000\n" u"This cue starts before the previous one.\n") ) except CaptionReadError: self.fail(u"Shouldn't raise CaptionReadError") def test_invalid_files(self): self.assertRaises( CaptionReadSyntaxError, WebVTTReader().read, (u"\nNOTE Cues without text are invalid.\n" u"00:00:20,000 --> 00:00:30,000\n" u"\n" u"00:00:40,000 --> 00:00:50,000\n" u"foo bar baz\n") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20,000 --> 00:00:10,000\n" u"Start time is greater than end time.") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20,000 --> 00:00:30,000\n" u"Start times should be consecutive.\n" u"\n" u"00:00:10,000 --> 00:00:20,000\n" u"This cue starts before the previous one.\n") )
class WebVTTReaderTestCase(unittest.TestCase): def setUp(self): self.reader = WebVTTReader() def test_positive_answer_for_detection(self): self.assertTrue(self.reader.detect(SAMPLE_WEBVTT)) def test_negative_answer_for_detection(self): self.assertFalse(self.reader.detect(SAMPLE_SRT)) def test_caption_length(self): captions = self.reader.read(SAMPLE_WEBVTT_2) self.assertEqual(len(captions.get_captions(u'en-US')), 7) def test_read_supports_multiple_languages(self): captions = self.reader.read(SAMPLE_WEBVTT, lang=u'es') self.assertIsNotNone(captions.get_captions(u'es')) def test_proper_timestamps(self): captions = self.reader.read(SAMPLE_WEBVTT) cue = captions.get_captions(u'en-US')[2] self.assertEqual(cue.start, 17000000) self.assertEqual(cue.end, 18752000) def test_webvtt_cue_components_removed_from_text(self): result = self.reader._remove_styles( u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have " u"its shortcomings</i>, but it is<u> the largest</u> collective " u"knowledge construction endevour</c> <ruby>base text <rt>" u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!" ) expected = ( u"Wikipedia is a great adventure. It may have " u"its shortcomings, but it is the largest collective " u"knowledge construction endevour base text annotation" u" Audry: Yes, indeed!" ) self.assertEqual(result, expected) def test_empty_file(self): self.assertRaises( CaptionReadNoCaptions, WebVTTReader().read, SAMPLE_WEBVTT_EMPTY) def test_not_ignoring_timing_errors(self): self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"\n" u"00:00:20.000 --> 00:00:10.000\n" u"foo bar baz") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20.000 --> 00:00:10.000\n" u"Start time is greater than end time.\n") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20.000 --> 00:00:30.000\n" u"Start times should be consecutive.\n" u"\n" u"00:00:10.000 --> 00:00:20.000\n" u"This cue starts before the previous one.\n") ) def test_ignoring_timing_errors(self): # Even if timing errors are ignored, this has to raise an exception self.assertRaises( CaptionReadSyntaxError, WebVTTReader().read, (u"\nNOTE invalid cue stamp\n" u"00:00:20.000 --> \n" u"foo bar baz\n") ) # And this too self.assertRaises( CaptionReadSyntaxError, WebVTTReader().read, (u"\n00:00:20,000 --> 00:00:22,000\n" u"Note the comma instead of point.\n") ) try: WebVTTReader().read( (u"\n" u"00:00:20.000 --> 00:00:10.000\n" u"Start time is greater than end time.\n") ) except CaptionReadError: self.fail(u"Shouldn't raise CaptionReadError") try: WebVTTReader().read( (u"\n" u"00:00:20.000 --> 00:00:30.000\n" u"Start times should be consecutive.\n" u"\n" u"00:00:10.000 --> 00:00:20.000\n" u"This cue starts before the previous one.\n") ) except CaptionReadError: self.fail(u"Shouldn't raise CaptionReadError") def test_invalid_files(self): self.assertRaises( CaptionReadSyntaxError, WebVTTReader().read, (u"\nNOTE Cues without text are invalid.\n" u"00:00:20.000 --> 00:00:30.000\n" u"\n" u"00:00:40.000 --> 00:00:50.000\n" u"foo bar baz\n") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20.000 --> 00:00:10.000\n" u"Start time is greater than end time.") ) self.assertRaises( CaptionReadError, WebVTTReader(ignore_timing_errors=False).read, (u"00:00:20.000 --> 00:00:30.000\n" u"Start times should be consecutive.\n" u"\n" u"00:00:10.000 --> 00:00:20.000\n" u"This cue starts before the previous one.\n") ) def test_zero_start(self): captions = self.reader.read(SAMPLE_WEBVTT_LAST_CUE_ZERO_START) cue = captions.get_captions(u'en-US')[0] self.assertEquals(cue.start, 0)
class WebVTTReaderTestCase(unittest.TestCase): def setUp(self): self.reader = WebVTTReader() def test_positive_answer_for_detection(self): self.assertTrue(self.reader.detect(SAMPLE_WEBVTT.decode(u'utf-8'))) def test_negative_answer_for_detection(self): self.assertFalse(self.reader.detect(SAMPLE_SRT.decode(u'utf-8'))) def test_caption_length(self): captions = self.reader.read(SAMPLE_WEBVTT_2.decode(u'utf-8')) self.assertEqual(len(captions.get_captions(u'en-US')), 7) def test_read_supports_multiple_languages(self): captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8'), lang=u'es') self.assertIsNotNone(captions.get_captions(u'es')) def test_proper_timestamps(self): captions = self.reader.read(SAMPLE_WEBVTT.decode(u'utf-8')) cue = captions.get_captions(u'en-US')[2] self.assertEqual(cue.start, 17000000) self.assertEqual(cue.end, 18752000) def test_webvtt_cue_components_removed_from_text(self): result = self.reader._remove_styles( u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have " u"its shortcomings</i>, but it is<u> the largest</u> collective " u"knowledge construction endevour</c> <ruby>base text <rt>" u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!" ) expected = ( u"Wikipedia is a great adventure. It may have " u"its shortcomings, but it is the largest collective " u"knowledge construction endevour base text annotation" u" Audry: Yes, indeed!" ) self.assertEqual(result, expected) def test_empty_file(self): self.assertRaises( CaptionReadNoCaptions, WebVTTReader().read, SAMPLE_WEBVTT_EMPTY.decode(u'utf-8')) def test_invalid_files(self): self.assertRaises( CaptionReadSyntaxError, WebVTTReader().read, u""" NOTE Cues without text are invalid. 00:00:20,000 --> 00:00:10,000 """ ) self.assertRaises( CaptionReadError, WebVTTReader().read, u""" 00:00:20,000 --> 00:00:10,000 Start time is greater than end time. """ ) self.assertRaises( CaptionReadError, WebVTTReader().read, u""" 00:00:20,000 --> 00:00:30,000 Start times should be consecutive. 00:00:10,000 --> 00:00:20,000 This cue starts before the previous one. """ )
class TestWebVTTReader: def setup_method(self): self.reader = WebVTTReader() def test_positive_answer_for_detection(self, sample_webvtt): assert self.reader.detect(sample_webvtt) is True def test_negative_answer_for_detection(self, sample_srt): assert self.reader.detect(sample_srt) is False def test_caption_length(self, sample_webvtt_2): captions = self.reader.read(sample_webvtt_2) assert len(captions.get_captions('en-US')) == 7 def test_read_supports_multiple_languages(self, sample_webvtt): captions = self.reader.read(sample_webvtt, lang='es') assert captions.get_captions('es') is not None def test_proper_timestamps(self, sample_webvtt): captions = self.reader.read(sample_webvtt) cue = captions.get_captions('en-US')[2] assert cue.start == 17000000 assert cue.end == 18752000 def test_webvtt_cue_components_removed_from_text(self): result = self.reader._remove_styles( "<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have " "its shortcomings</i>, but it is<u> the largest</u> collective " "knowledge construction endevour</c> <ruby>base text <rt>" "annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!") expected = ("Wikipedia is a great adventure. It may have " "its shortcomings, but it is the largest collective " "knowledge construction endevour base text annotation" " Audry: Yes, indeed!") assert result == expected def test_empty_file(self, sample_webvtt_empty): with pytest.raises(CaptionReadNoCaptions): WebVTTReader().read(sample_webvtt_empty) def test_not_ignoring_timing_errors(self): # todo: same assert w/ different arguments -> this can be parametrized; with pytest.raises(CaptionReadError): WebVTTReader(ignore_timing_errors=False).read( "\n" "00:00:20.000 --> 00:00:10.000\n" "foo bar baz") with pytest.raises(CaptionReadError): WebVTTReader(ignore_timing_errors=False).read( "00:00:20.000 --> 00:00:10.000\n" "Start time is greater than end time.\n") with pytest.raises(CaptionReadError): WebVTTReader(ignore_timing_errors=False).read( "00:00:20.000 --> 00:00:30.000\n" "Start times should be consecutive.\n" "\n" "00:00:10.000 --> 00:00:20.000\n" "This cue starts before the previous one.\n") def test_ignoring_timing_errors(self): # Even if timing errors are ignored, this has to raise an exception with pytest.raises(CaptionReadSyntaxError): WebVTTReader().read( "\nNOTE invalid cue stamp\n00:00:20.000 --> \nfoo bar baz\n") # And this too with pytest.raises(CaptionReadSyntaxError): WebVTTReader().read("\n00:00:20,000 --> 00:00:22,000\n" "Note the comma instead of point.\n") # todo: at this point it can be split into 2 separate tests try: WebVTTReader().read("\n" "00:00:20.000 --> 00:00:10.000\n" "Start time is greater than end time.\n") except CaptionReadError: pytest.fail("Shouldn't raise CaptionReadError") try: WebVTTReader().read("\n" "00:00:20.000 --> 00:00:30.000\n" "Start times should be consecutive.\n" "\n" "00:00:10.000 --> 00:00:20.000\n" "This cue starts before the previous one.\n") except CaptionReadError: pytest.fail("Shouldn't raise CaptionReadError") def test_invalid_files(self): with pytest.raises(CaptionReadError): WebVTTReader(ignore_timing_errors=False).read( "00:00:20.000 --> 00:00:10.000\n" "Start time is greater than end time.") with pytest.raises(CaptionReadError): WebVTTReader(ignore_timing_errors=False).read( "00:00:20.000 --> 00:00:30.000\n" "Start times should be consecutive.\n" "\n" "00:00:10.000 --> 00:00:20.000\n" "This cue starts before the previous one.\n") def test_zero_start(self, sample_webvtt_last_cue_zero_start): captions = self.reader.read(sample_webvtt_last_cue_zero_start) cue = captions.get_captions('en-US')[0] assert cue.start == 0 def test_webvtt_empty_cue(self, sample_webvtt_empty_cue): assert 1 == len( self.reader.read(sample_webvtt_empty_cue).get_captions('en-US'))