Ejemplo n.º 1
0
class SRTCaptionsTestCase(unittest.TestCase):

    def setUp(self):
        self.webvtt = WebVTT()
        self.srtcaptions = SRTCaptions()

        os.makedirs(OUTPUT_DIR)

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_convert_from_srt_to_vtt_and_back_gives_same_file(self):
        copy(self._get_file('sample.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'sample.srt'))
        self.webvtt.save()

        self.srtcaptions.from_vtt(os.path.join(OUTPUT_DIR, 'sample.vtt'))
        self.srtcaptions.save(os.path.join(OUTPUT_DIR, 'sample_converted.srt'))

        with open(os.path.join(OUTPUT_DIR, 'sample.srt'), 'r', encoding='utf-8') as f:
            original = f.read()

        with open(os.path.join(OUTPUT_DIR, 'sample_converted.srt'), 'r', encoding='utf-8') as f:
            converted = f.read()

        self.assertEqual(original.strip(), converted.strip())
Ejemplo n.º 2
0
 def test_srt_empty_gets_removed(self):
     webvtt = WebVTT(parse_options={'ignore_empty_captions': True})
     captions = webvtt.from_srt(
         self._get_file('empty_caption_text.srt')).captions
     for caption in captions:
         self.assertNotEqual(len(caption.lines), 0)
         for line in caption.lines:
             self.assertNotEqual(line, "")
             self.assertIsNotNone(line)
Ejemplo n.º 3
0
def read_file(file_name):
    """ Reads an SRT file """

    data = WebVTT.from_srt(file_name)
    captions = data.captions

    segments = []
    for caption in captions:
        seg = read_caption(caption)
        if seg is not None:
            segments.append(seg)

    return segments
Ejemplo n.º 4
0
class WebVTTTestCase(unittest.TestCase):
    def setUp(self):
        self.webvtt = WebVTT()

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_create_caption(self):
        caption = Caption('00:00:00.500', '00:00:07.000',
                          ['Caption test line 1', 'Caption test line 2'])
        self.assertEqual(caption.start, '00:00:00.500')
        self.assertEqual(caption.start_in_seconds, 0.5)
        self.assertEqual(caption.end, '00:00:07.000')
        self.assertEqual(caption.end_in_seconds, 7)
        self.assertEqual(caption.lines,
                         ['Caption test line 1', 'Caption test line 2'])

    def test_save_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.save()

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        self.webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_to_other_location(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)

        self.webvtt.read(self._get_file('one_caption.vtt')).save(target_path)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'one_caption.vtt')))

    def test_save_specific_filename(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name.vtt')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(os.path.exists(output_file))

    def test_save_specific_filename_no_extension(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'custom_name.vtt')))

    def test_caption_timestamp_update(self):
        c = Caption('00:00:00.500', '00:00:07.000')
        c.start = '00:00:01.750'
        c.end = '00:00:08.250'

        self.assertEqual(c.start, '00:00:01.750')
        self.assertEqual(c.end, '00:00:08.250')

    def test_caption_text(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_caption_receive_text(self):
        c = Caption(text='Caption line #1\nCaption line #2')

        self.assertEqual(len(c.lines), 2)
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_supported_formats(self):
        self.assertListEqual(WebVTT().supported_formats(),
                             [sf[0] for sf in SUPPORTED_FORMATS])

    def test_update_text(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1 updated'
        self.assertEqual(c.text, 'Caption line #1 updated')

    def test_update_text_multiline(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1\nCaption line #2'

        self.assertEqual(len(c.lines), 2)

        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text_wrong_type(self):
        c = Caption(text='Caption line #1')

        self.assertRaises(AttributeError, setattr, c, 'text', 123)

    def test_manipulate_lines(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        c.lines[0] = 'Caption line #1 updated'
        self.assertEqual(c.lines[0], 'Caption line #1 updated')

    def test_captions(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt.captions, list)

    def test_captions_prevent_write(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertRaises(AttributeError, setattr, self.webvtt, 'captions', [])

    def test_sequence_iteration(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt[0], Caption)
        self.assertEqual(len(self.webvtt), len(self.webvtt.captions))

    def test_save_no_filename(self):
        webvtt = WebVTT()
        self.assertRaises(MissingFilenameError, webvtt.save)

    def test_malformed_start_timestamp(self):
        self.assertRaises(MalformedCaptionError, Caption, '01:00')
Ejemplo n.º 5
0
 def test_srt_empty_gets_removed(self):
     webvtt = WebVTT()
     captions = webvtt.from_srt(
         self._get_file('missing_caption_text.srt')).captions
     self.assertEqual(len(captions), 4)
Ejemplo n.º 6
0
 def test_srt_empty_caption_text(self):
     webvtt = WebVTT()
     self.assertTrue(
         webvtt.from_srt(
             self._get_file('missing_caption_text.srt')).captions)
Ejemplo n.º 7
0
 def test_srt_empty_caption_text(self):
     webvtt = WebVTT(parse_options={'ignore_empty_captions': True})
     self.assertTrue(
         webvtt.from_srt(self._get_file('empty_caption_text.srt')).captions)
Ejemplo n.º 8
0
class WebVTTTestCase(unittest.TestCase):
    def setUp(self):
        self.webvtt = WebVTT()

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_create_caption(self):
        caption = Caption('00:00:00.500', '00:00:07.000',
                          ['Caption test line 1', 'Caption test line 2'])
        self.assertEqual(caption.start, '00:00:00.500')
        self.assertEqual(caption.start_in_seconds, 0.5)
        self.assertEqual(caption.end, '00:00:07.000')
        self.assertEqual(caption.end_in_seconds, 7)
        self.assertEqual(caption.lines,
                         ['Caption test line 1', 'Caption test line 2'])

    def test_write_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        out = io.StringIO()
        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.write(out)

        out.seek(0)
        lines = [line.rstrip() for line in out.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.save()

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        self.webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_to_other_location(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)

        self.webvtt.read(self._get_file('one_caption.vtt')).save(target_path)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'one_caption.vtt')))

    def test_save_specific_filename(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name.vtt')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(os.path.exists(output_file))

    def test_save_specific_filename_no_extension(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'custom_name.vtt')))

    def test_caption_timestamp_update(self):
        c = Caption('00:00:00.500', '00:00:07.000')
        c.start = '00:00:01.750'
        c.end = '00:00:08.250'

        self.assertEqual(c.start, '00:00:01.750')
        self.assertEqual(c.end, '00:00:08.250')

    def test_caption_timestamp_format(self):
        c = Caption('01:02:03.400', '02:03:04.500')
        self.assertEqual(c.start, '01:02:03.400')
        self.assertEqual(c.end, '02:03:04.500')

        c = Caption('02:03.400', '03:04.500')
        self.assertEqual(c.start, '00:02:03.400')
        self.assertEqual(c.end, '00:03:04.500')

    def test_caption_text(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_caption_receive_text(self):
        c = Caption(text='Caption line #1\nCaption line #2')

        self.assertEqual(len(c.lines), 2)
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1 updated'
        self.assertEqual(c.text, 'Caption line #1 updated')

    def test_update_text_multiline(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1\nCaption line #2'

        self.assertEqual(len(c.lines), 2)

        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text_wrong_type(self):
        c = Caption(text='Caption line #1')

        self.assertRaises(AttributeError, setattr, c, 'text', 123)

    def test_manipulate_lines(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        c.lines[0] = 'Caption line #1 updated'
        self.assertEqual(c.lines[0], 'Caption line #1 updated')

    def test_captions(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt.captions, list)

    def test_captions_prevent_write(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertRaises(AttributeError, setattr, self.webvtt, 'captions', [])

    def test_sequence_iteration(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt[0], Caption)
        self.assertEqual(len(self.webvtt), len(self.webvtt.captions))

    def test_save_no_filename(self):
        webvtt = WebVTT()
        self.assertRaises(MissingFilenameError, webvtt.save)

    def test_malformed_start_timestamp(self):
        self.assertRaises(MalformedCaptionError, Caption, '01:00')

    def test_set_styles_from_text(self):
        style = Style()
        style.text = '::cue(b) {\n  color: peachpuff;\n}'
        self.assertListEqual(style.lines,
                             ['::cue(b) {', '  color: peachpuff;', '}'])

    def test_get_styles_as_text(self):
        style = Style()
        style.lines = ['::cue(b) {', '  color: peachpuff;', '}']
        self.assertEqual(style.text, '::cue(b) {color: peachpuff;}')

    def test_save_identifiers(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt'))
        self.webvtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'))

        with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', 'second caption', '00:00:07.000 --> 00:00:11.890',
            'Caption text #2', '', '00:00:11.890 --> 00:00:16.320',
            'Caption text #3', '', '4', '00:00:16.320 --> 00:00:21.580',
            'Caption text #4', '', '00:00:21.580 --> 00:00:23.880',
            'Caption text #5', '', '00:00:23.880 --> 00:00:27.280',
            'Caption text #6'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_updated_identifiers(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt'))
        self.webvtt.captions[0].identifier = 'first caption'
        self.webvtt.captions[1].identifier = None
        self.webvtt.captions[3].identifier = '44'
        last_caption = Caption('00:00:27.280', '00:00:29.200',
                               'Caption text #7')
        last_caption.identifier = 'last caption'
        self.webvtt.captions.append(last_caption)
        self.webvtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'))

        with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', 'first caption', '00:00:00.500 --> 00:00:07.000',
            'Caption text #1', '', '00:00:07.000 --> 00:00:11.890',
            'Caption text #2', '', '00:00:11.890 --> 00:00:16.320',
            'Caption text #3', '', '44', '00:00:16.320 --> 00:00:21.580',
            'Caption text #4', '', '00:00:21.580 --> 00:00:23.880',
            'Caption text #5', '', '00:00:23.880 --> 00:00:27.280',
            'Caption text #6', '', 'last caption',
            '00:00:27.280 --> 00:00:29.200', 'Caption text #7'
        ]

        self.assertListEqual(lines, expected_lines)