Python WebVTT.read Examples

Programming Language: Python

Namespace/Package Name: webvtt

Class/Type: WebVTT

Method/Function: read

Examples at hotexamples.com: 6

Python WebVTT.read - 6 examples found. These are the top rated real world Python examples of webvtt.WebVTT.read extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WebVTT(30)

save(21)

from_srt(8)

read(6)

write(3)

from_sbv(2)

save_as_srt(1)

Example #1

Show file

def read_file(file_name):
    """ Reads a VTT file """

    data = WebVTT.read(file_name)
    captions = data.captions

    segments = []
    for caption in captions:
        seg = read_caption(caption)
        if seg is not None:
            segments.append(seg)

    return segments

Example #2

Show file

class VttToSrtTranscoder:
    def __init__(self, suffix='', clear=True, vtt_reader=None):
        self.__suffix = suffix
        self.__clear = clear

        self.__out_ext = '.srt'
        self.__in_ext = '.vtt'

        self.__vtt_reader = vtt_reader
        if self.__vtt_reader is None:
            self.__vtt_reader = WebVTT()

    def get_in_ext(self):
        return self.__in_ext

    def get_out_ext(self):
        return self.__out_ext

    def __write_subs(self, out_fd, in_path):
        for index, caption in enumerate(self.__vtt_reader.read(in_path)):
            start = SubRipTime(0, 0, caption.start_in_seconds)
            end = SubRipTime(0, 0, caption.end_in_seconds)
            item = SubRipItem(index + 1, start, end,
                              html.unescape(caption.text))
            out_fd.write("%s\n" % str(item))

    def __create_out_filename(self, in_file_path):
        out_file_path = os.path.splitext(in_file_path)[0]
        if len(self.__suffix) > 0:
            out_file_path = "%s-%s" % (out_file_path, self.__suffix)

        return out_file_path + self.__out_ext

    def apply(self, in_vtt_path):

        in_vtt_path = os.path.abspath(in_vtt_path)
        validate_ext(in_vtt_path, self.__in_ext)

        out_file_path = self.__create_out_filename(in_vtt_path)

        with open(out_file_path, "w") as out_fd:
            self.__write_subs(out_fd, in_vtt_path)

        if self.__clear:
            os.remove(in_vtt_path)

        return out_file_path

Example #3

Show file

File: TranslateSubtitles.py Project: blueeyes08/KATranslationCheck

# Find VTT file<
def vtt_file(out):
    for line in out.decode("utf-8").split("\n"):
        if "Writing video subtitles to: " in line:
            return line.partition(":")[2].strip()
    return None


filename = vtt_file(out)
if not filename:
    print(red("Video does not seem to have english subs", bold=True))
    sys.exit(1)

# Read source VTT & convert to HTML
vtt = WebVTT()
vtt.read(filename)

stmp = StringIO()
print("<div>", file=stmp)
for caption in vtt:
    print('<span data-start="{}" data-end="{}">{}</span>'.format(
        caption.start, caption.end, caption.text),
          file=stmp)
print("</div>", file=stmp)

# Translate
driver = TranslationDriver(args.lang)
strans = driver.translate(stmp.getvalue())

# Convert translated HTML back to VTT
vtt = WebVTT()

Example #4

Show file

class WebVTTTestCase(unittest.TestCase):
    def setUp(self):
        self.webvtt = WebVTT()

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_create_caption(self):
        caption = Caption('00:00:00.500', '00:00:07.000',
                          ['Caption test line 1', 'Caption test line 2'])
        self.assertEqual(caption.start, '00:00:00.500')
        self.assertEqual(caption.start_in_seconds, 0.5)
        self.assertEqual(caption.end, '00:00:07.000')
        self.assertEqual(caption.end_in_seconds, 7)
        self.assertEqual(caption.lines,
                         ['Caption test line 1', 'Caption test line 2'])

    def test_save_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.save()

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        self.webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_to_other_location(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)

        self.webvtt.read(self._get_file('one_caption.vtt')).save(target_path)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'one_caption.vtt')))

    def test_save_specific_filename(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name.vtt')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(os.path.exists(output_file))

    def test_save_specific_filename_no_extension(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'custom_name.vtt')))

    def test_caption_timestamp_update(self):
        c = Caption('00:00:00.500', '00:00:07.000')
        c.start = '00:00:01.750'
        c.end = '00:00:08.250'

        self.assertEqual(c.start, '00:00:01.750')
        self.assertEqual(c.end, '00:00:08.250')

    def test_caption_text(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_caption_receive_text(self):
        c = Caption(text='Caption line #1\nCaption line #2')

        self.assertEqual(len(c.lines), 2)
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_supported_formats(self):
        self.assertListEqual(WebVTT().supported_formats(),
                             [sf[0] for sf in SUPPORTED_FORMATS])

    def test_update_text(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1 updated'
        self.assertEqual(c.text, 'Caption line #1 updated')

    def test_update_text_multiline(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1\nCaption line #2'

        self.assertEqual(len(c.lines), 2)

        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text_wrong_type(self):
        c = Caption(text='Caption line #1')

        self.assertRaises(AttributeError, setattr, c, 'text', 123)

    def test_manipulate_lines(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        c.lines[0] = 'Caption line #1 updated'
        self.assertEqual(c.lines[0], 'Caption line #1 updated')

    def test_captions(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt.captions, list)

    def test_captions_prevent_write(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertRaises(AttributeError, setattr, self.webvtt, 'captions', [])

    def test_sequence_iteration(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt[0], Caption)
        self.assertEqual(len(self.webvtt), len(self.webvtt.captions))

    def test_save_no_filename(self):
        webvtt = WebVTT()
        self.assertRaises(MissingFilenameError, webvtt.save)

    def test_malformed_start_timestamp(self):
        self.assertRaises(MalformedCaptionError, Caption, '01:00')

Example #5

Show file

File: utils.py Project: Eddie-Hwang/KTV-Covid-19

 def read_caption(self, vtt_file):
     return WebVTT.read(vtt_file)

Example #6

Show file

class WebVTTTestCase(unittest.TestCase):
    def setUp(self):
        self.webvtt = WebVTT()

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_create_caption(self):
        caption = Caption('00:00:00.500', '00:00:07.000',
                          ['Caption test line 1', 'Caption test line 2'])
        self.assertEqual(caption.start, '00:00:00.500')
        self.assertEqual(caption.start_in_seconds, 0.5)
        self.assertEqual(caption.end, '00:00:07.000')
        self.assertEqual(caption.end_in_seconds, 7)
        self.assertEqual(caption.lines,
                         ['Caption test line 1', 'Caption test line 2'])

    def test_write_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        out = io.StringIO()
        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.write(out)

        out.seek(0)
        lines = [line.rstrip() for line in out.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.save()

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        self.webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_to_other_location(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)

        self.webvtt.read(self._get_file('one_caption.vtt')).save(target_path)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'one_caption.vtt')))

    def test_save_specific_filename(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name.vtt')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(os.path.exists(output_file))

    def test_save_specific_filename_no_extension(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'custom_name.vtt')))

    def test_caption_timestamp_update(self):
        c = Caption('00:00:00.500', '00:00:07.000')
        c.start = '00:00:01.750'
        c.end = '00:00:08.250'

        self.assertEqual(c.start, '00:00:01.750')
        self.assertEqual(c.end, '00:00:08.250')

    def test_caption_timestamp_format(self):
        c = Caption('01:02:03.400', '02:03:04.500')
        self.assertEqual(c.start, '01:02:03.400')
        self.assertEqual(c.end, '02:03:04.500')

        c = Caption('02:03.400', '03:04.500')
        self.assertEqual(c.start, '00:02:03.400')
        self.assertEqual(c.end, '00:03:04.500')

    def test_caption_text(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_caption_receive_text(self):
        c = Caption(text='Caption line #1\nCaption line #2')

        self.assertEqual(len(c.lines), 2)
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1 updated'
        self.assertEqual(c.text, 'Caption line #1 updated')

    def test_update_text_multiline(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1\nCaption line #2'

        self.assertEqual(len(c.lines), 2)

        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text_wrong_type(self):
        c = Caption(text='Caption line #1')

        self.assertRaises(AttributeError, setattr, c, 'text', 123)

    def test_manipulate_lines(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        c.lines[0] = 'Caption line #1 updated'
        self.assertEqual(c.lines[0], 'Caption line #1 updated')

    def test_captions(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt.captions, list)

    def test_captions_prevent_write(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertRaises(AttributeError, setattr, self.webvtt, 'captions', [])

    def test_sequence_iteration(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt[0], Caption)
        self.assertEqual(len(self.webvtt), len(self.webvtt.captions))

    def test_save_no_filename(self):
        webvtt = WebVTT()
        self.assertRaises(MissingFilenameError, webvtt.save)

    def test_malformed_start_timestamp(self):
        self.assertRaises(MalformedCaptionError, Caption, '01:00')

    def test_set_styles_from_text(self):
        style = Style()
        style.text = '::cue(b) {\n  color: peachpuff;\n}'
        self.assertListEqual(style.lines,
                             ['::cue(b) {', '  color: peachpuff;', '}'])

    def test_get_styles_as_text(self):
        style = Style()
        style.lines = ['::cue(b) {', '  color: peachpuff;', '}']
        self.assertEqual(style.text, '::cue(b) {color: peachpuff;}')

    def test_save_identifiers(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt'))
        self.webvtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'))

        with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', 'second caption', '00:00:07.000 --> 00:00:11.890',
            'Caption text #2', '', '00:00:11.890 --> 00:00:16.320',
            'Caption text #3', '', '4', '00:00:16.320 --> 00:00:21.580',
            'Caption text #4', '', '00:00:21.580 --> 00:00:23.880',
            'Caption text #5', '', '00:00:23.880 --> 00:00:27.280',
            'Caption text #6'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_updated_identifiers(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt'))
        self.webvtt.captions[0].identifier = 'first caption'
        self.webvtt.captions[1].identifier = None
        self.webvtt.captions[3].identifier = '44'
        last_caption = Caption('00:00:27.280', '00:00:29.200',
                               'Caption text #7')
        last_caption.identifier = 'last caption'
        self.webvtt.captions.append(last_caption)
        self.webvtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'))

        with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', 'first caption', '00:00:00.500 --> 00:00:07.000',
            'Caption text #1', '', '00:00:07.000 --> 00:00:11.890',
            'Caption text #2', '', '00:00:11.890 --> 00:00:16.320',
            'Caption text #3', '', '44', '00:00:16.320 --> 00:00:21.580',
            'Caption text #4', '', '00:00:21.580 --> 00:00:23.880',
            'Caption text #5', '', '00:00:23.880 --> 00:00:27.280',
            'Caption text #6', '', 'last caption',
            '00:00:27.280 --> 00:00:29.200', 'Caption text #7'
        ]

        self.assertListEqual(lines, expected_lines)