예제 #1
0
 def test_compare_trailings_with_ref(self):
     ref_path2 = os.path.join(self.static_path, 'ref_notrailings.vtt')
     vtt_file_ref2 = pyvtt.open(
         ref_path2,
         encoding='utf_8')  # Reference file (clean, no whitespaces).
     vtt_file_ut = pyvtt.open(self.test_trailings_path, encoding='utf_8')
     vtt_file_ut.clean_text(
         tags=False, keys=False, trailing=True
     )  # Only trailing removal (whitespaces at end(beginning) is enabled.
     self.assertEqual(vtt_file_ref2.text, vtt_file_ut.text)
예제 #2
0
 def test_compare_replacements_with_ref(self):
     ref_path2 = os.path.join(self.static_path, 'ref_replacements.vtt')
     vtt_file_ref2 = pyvtt.open(
         ref_path2,
         encoding='utf_8')  # Reference file (clean, no whitespaces).
     vtt_file_ut = pyvtt.open(self.test_replacements_path, encoding='utf_8')
     vtt_file_ut.apply_replacements(replacements={
         '&': 'and',
         '+': 'plus'
     })  # Only & -> and replacement
     self.assertEqual(vtt_file_ref2.text, vtt_file_ut.text)
예제 #3
0
    def test_eol_preservation(self):

        # Tests input eol is kept after saving

        self.temp_eol_path = os.path.join(self.static_path,
                                          'temp_eol_preserv.vtt')
        end_of_lines = ['\n', '\r', '\r\n']
        enc = 'utf-8'

        for eols in end_of_lines:
            input_eol = open(self.temp_eol_path, 'wb')
            input_eol.write(
                str('00:01:00,000 --> 00:02:00,000' + eols +
                    'TestEOLPreservation' + eols))
            input_eol.close()

            input_file = open(self.temp_eol_path, 'rU', encoding=enc)
            input_file.read()
            self.assertEqual(eols, input_file.newlines)

            vtt_file = pyvtt.open(self.temp_eol_path, encoding=enc)
            vtt_file.save(self.temp_eol_path, eol=input_file.newlines)

            output_file = open(self.temp_eol_path, 'rU', encoding=enc)
            output_file.read()
            self.assertEqual(output_file.newlines, input_file.newlines)

            os.remove(self.temp_eol_path)
예제 #4
0
 def test_windows1252(self):
     vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
     self.assertEqual(len(vtt_file), 1332)
     self.assertEqual(vtt_file.eol, '\r\n')
     self.assertRaises(UnicodeDecodeError,
                       pyvtt.open,
                       self.utf8_path,
                       encoding='ascii')
예제 #5
0
    def test_compare_shift_with_ref(self):
        vtt_file_ref = pyvtt.open(self.ref_dur_shifted_path, encoding='utf_8')
        vtt_file_ut1 = pyvtt.open(self.test_duration_path, encoding='utf_8')
        vtt_file_ut2 = pyvtt.open(self.test_duration_path, encoding='utf_8')
        ref_ratio_path = os.path.join(self.static_path,
                                      'ref_duration_ratio.vtt')
        vtt_file_ref_ratio = pyvtt.open(ref_ratio_path, encoding='utf_8')

        vtt_file_ut1.shift(
            hours=5, minutes=5, seconds=5, milliseconds=500
        )  # Shifted 5 hours, 5 minutes, 5 seconds, 500 milliseconds.
        self.assertEqual(vtt_file_ut1, vtt_file_ref)
        vtt_file_ut1.shift(
            hours=-5, minutes=-5, seconds=-5, milliseconds=-500
        )  # Shifted BACK 5 hours, 5 minutes, 5 seconds, 500 milliseconds.
        self.assertEqual(vtt_file_ut1, vtt_file_ut2)
        vtt_file_ut1.shift(ratio=2)
        self.assertEqual(vtt_file_ut1,
                         vtt_file_ref_ratio)  # Shifted with a ratio of 2.
예제 #6
0
    def test_eol_conversion(self):
        input_file = open(self.windows_path, 'rU', encoding='windows-1252')
        input_file.read()
        self.assertEqual(input_file.newlines, '\r\n')

        vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
        vtt_file.save(self.temp_path, eol='\n')

        output_file = open(self.temp_path, 'rU', encoding='windows-1252')
        output_file.read()
        self.assertEqual(output_file.newlines, '\n')
예제 #7
0
    def test_eol_conversion(self):
        input_file = open(self.windows_path, 'rU', encoding='windows-1252')
        input_file.read()
        self.assertEqual(input_file.newlines, '\r\n')

        vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
        vtt_file.save(self.temp_path, eol='\n')

        output_file = open(self.temp_path, 'rU', encoding='windows-1252')
        output_file.read()
        self.assertEqual(output_file.newlines, '\n')
예제 #8
0
 def test_save_with_indexes(self):
     file = pyvtt.open(os.path.join(self.static_path, 'no-indexes.srt'))
     file.clean_indexes()
     file_with_indexes = os.path.join(file_path, 'tests', 'vtt_test',
                                      'file_with_indexes.vtt')
     file_with_indexes_target_path = os.path.join(
         file_path, 'tests', 'vtt_test', 'file_with_indexes_target.vtt')
     file.save(file_with_indexes_target_path, include_indexes=True)
     self.assertEqual(
         bytes(open(file_with_indexes, 'rb').read()),
         bytes(open(file_with_indexes_target_path, 'rb').read()))
     os.remove(file_with_indexes_target_path)
예제 #9
0
    def test_save_overwrite(self):
        overwrite_source_path1 = os.path.join(file_path, 'tests', 'vtt_test',
                                              'overwrite_source1.vtt')
        overwrite_source_path2 = os.path.join(file_path, 'tests', 'vtt_test',
                                              'overwrite_source2.vtt')
        overwrite_target_path = os.path.join(file_path, 'tests', 'vtt_test',
                                             'overwrite_target.vtt')

        vtt_file1 = pyvtt.open(overwrite_source_path1, encoding='utf-8')
        vtt_file1.save(overwrite_target_path,
                       eol=vtt_file1._eol,
                       encoding=vtt_file1.encoding)
        self.assertEqual(bytes(open(overwrite_source_path1, 'rb').read()),
                         bytes(open(overwrite_target_path, 'rb').read()))

        vtt_file2 = pyvtt.open(overwrite_source_path2, encoding='utf-8')
        vtt_file2.save(overwrite_target_path,
                       eol=vtt_file2._eol,
                       encoding=vtt_file2.encoding)
        self.assertEqual(bytes(open(overwrite_source_path2, 'rb').read()),
                         bytes(open(overwrite_target_path, 'rb').read()))

        os.remove(overwrite_target_path)
예제 #10
0
    def test_compare_slice_with_ref(self):
        vtt_file_ref = pyvtt.open(self.ref_dur_sliced_path, encoding='utf_8')
        vtt_file_source = pyvtt.open(self.test_duration_path, encoding='utf_8')
        temp_file_path = os.path.join(self.static_path, 'temp_test.vtt')

        vtt_file_ut = vtt_file_source.slice(starts_after={'minutes': 2})
        self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path)
        os.remove(temp_file_path)

        vtt_file_ut = vtt_file_source.slice(starts_after={'seconds': 20},
                                            ends_before={'seconds': 42})
        vtt_file_ut.save(temp_file_path, eol='\n', encoding='utf_8')
        self.assertEqual(vtt_file_ut, vtt_file_ref)
        os.remove(temp_file_path)

        vtt_file_ut = vtt_file_source.slice(starts_after={'seconds': -20},
                                            ends_before={'seconds': -42})
        self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path)
        os.remove(temp_file_path)

        vtt_file_ut = vtt_file_source.slice(
            ends_before={'seconds': 42},
            ends_after={'seconds': 40})  # ends_before > ends_after
        self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path)
        os.remove(temp_file_path)

        vtt_file_ut = vtt_file_source.slice(
            starts_before={'seconds': 10},
            starts_after={'seconds': 30})  # starts_before < starts_after
        self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path)
        os.remove(temp_file_path)

        vtt_file_ut = vtt_file_source.slice(
            starts_after={'seconds': 42},
            ends_before={'seconds': 30})  # starts_after > ends_before
        self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path)
        os.remove(temp_file_path)
예제 #11
0
 def setUp(self):
     self.static_path = os.path.join(file_path, 'tests', 'vtt_test')
     self.ref_path = os.path.join(self.static_path, 'ref.vtt')
     self.ref_dur_shifted_path = os.path.join(self.static_path,
                                              'ref_duration_shifted.vtt')
     self.ref_dur_sliced_path = os.path.join(self.static_path,
                                             'ref_duration_sliced.vtt')
     self.test_tags_path = os.path.join(self.static_path, 'test_tags.vtt')
     self.test_keys_path = os.path.join(self.static_path, 'test_keys.vtt')
     self.test_trailings_path = os.path.join(self.static_path,
                                             'test_trailings.vtt')
     self.test_duration_path = os.path.join(self.static_path,
                                            'test_duration.vtt')
     self.test_replacements_path = os.path.join(self.static_path,
                                                'test_replacements.vtt')
     self.vtt_file_ref = pyvtt.open(
         self.ref_path,
         encoding='utf_8')  # Reference file (clean, no tags/keys)
예제 #12
0
 def test_missing_indexes(self):
     items = pyvtt.open(os.path.join(self.base_path, 'no-indexes.srt'))
     self.assertEquals(len(items), 7)
예제 #13
0
def subtitles2timestamps(input_path):
    return [(caption.text, caption.start.ordinal / 1000,
             caption.end.ordinal / 1000) for caption in pyvtt.open(input_path)]
예제 #14
0
 def test_save_empty_slice(self):
     vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
     sliced_file = vtt_file.slice(starts_after=(0, 0, 0, 0),
                                  ends_before=(0, 0, 0, 0))
     self.assertEqual(len(sliced_file), 0)
     self.assertRaises(InvalidFile, sliced_file.save, self.temp_path)
예제 #15
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pyvtt.open(self.utf8_path),
                    pyvtt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
예제 #16
0
 def test_missing_indexes(self):
     items = pyvtt.open(os.path.join(self.base_path, 'no-indexes.srt'))
     self.assertEquals(len(items), 7)
예제 #17
0
 def test_length(self):
     path = os.path.join(self.base_path, 'capability_tester.srt')
     file = pyvtt.open(path)
     self.assertEqual(len(file), 37)
예제 #18
0
 def setUp(self):
     self.file = pyvtt.open(os.path.join(file_path, 'tests', 'static',
                                         'utf-8.vtt'))
예제 #19
0
 def test_save_empty_slice(self):
     vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
     sliced_file = vtt_file.slice(starts_after=(0, 0, 0, 0),
                                  ends_before=(0, 0, 0, 0))
     self.assertEqual(len(sliced_file), 0)
     self.assertRaises(InvalidFile, sliced_file.save, self.temp_path)
예제 #20
0
 def test_save_new_eol_and_encoding(self):
     vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
     vtt_file.save(self.temp_path, eol='\n', encoding='utf-8')
     self.assertEqual(bytes(open(self.temp_path, 'rb').read()),
                      bytes(open(self.utf8_path, 'rb').read()))
     os.remove(self.temp_path)
예제 #21
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pyvtt.open(self.utf8_path),
                    pyvtt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
예제 #22
0
 def test_compare_tags_with_ref(self):
     vtt_file_ut = pyvtt.open(self.test_tags_path, encoding='utf_8')
     vtt_file_ut.clean_text(tags=True, keys=False,
                            trailing=False)  # Only tags removal is enabled.
     self.assertEqual(self.vtt_file_ref.text, vtt_file_ut.text)
예제 #23
0
 def test_utf8(self):
     self.assertEqual(len(pyvtt.open(self.utf8_path)), 1332)
     self.assertEqual(pyvtt.open(self.utf8_path).encoding, 'utf_8')
     self.assertRaises(UnicodeDecodeError, pyvtt.open, self.windows_path)
예제 #24
0
 def __test_encoding(self, encoding):
     vtt_file = pyvtt.open(os.path.join(self.base_path, encoding))
     self.assertEqual(len(vtt_file), 7)
     self.assertEqual(vtt_file[0].index, 1)
예제 #25
0
def subtitles2text(input_path):
    return '\n'.join(
        adjust_caption_text(caption.text)
        for caption in pyvtt.open(input_path))
예제 #26
0
 def test_utf8(self):
     self.assertEqual(len(pyvtt.open(self.utf8_path)), 1332)
     self.assertEqual(pyvtt.open(self.utf8_path).encoding, 'utf_8')
     self.assertRaises(UnicodeDecodeError, pyvtt.open,
                       self.windows_path)
예제 #27
0
 def test_empty_file(self):
     file = pyvtt.open('/dev/null', error_handling=WebVTTFile.ERROR_RAISE)
     self.assertEqual(len(file), 0)
예제 #28
0
 def test_windows1252(self):
     vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
     self.assertEqual(len(vtt_file), 1332)
     self.assertEqual(vtt_file.eol, '\r\n')
     self.assertRaises(UnicodeDecodeError, pyvtt.open,
                       self.utf8_path, encoding='ascii')
예제 #29
0
 def setUp(self):
     self.file = pyvtt.open(
         os.path.join(file_path, 'tests', 'static', 'utf-8.vtt'))
예제 #30
0
 def test_save(self):
     vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252')
     vtt_file.save(self.temp_path, eol='\n', encoding='utf-8')
     self.assertEqual(bytes(open(self.temp_path, 'rb').read()),
                      bytes(open(self.utf8_path, 'rb').read()))
     os.remove(self.temp_path)
예제 #31
0
 def __test_encoding(self, encoding):
     vtt_file = pyvtt.open(os.path.join(self.base_path, encoding))
     self.assertEqual(len(vtt_file), 7)
     self.assertEqual(vtt_file[0].index, 1)
예제 #32
0
 def test_length(self):
     path = os.path.join(self.base_path, 'capability_tester.srt')
     file = pyvtt.open(path)
     self.assertEqual(len(file), 37)
    def vtt2bcc(path, threshold=0.1, word=True):
        path = path if path else ""
        if os.path.exists(path):
            subs = pyvtt.open(path)
        else:
            subs = pyvtt.from_string(path)

        caption_list = []
        if not word:
            caption_list = [{
                "from": sub.start.ordinal / 1000,
                "to": sub.end.ordinal / 1000,
                "location": 2,
                "content": sub.text_without_tags.split("\n")[-1],
            } for sub in subs]
        else:
            # NOTE 按照 vtt 的断词模式分隔 bcc
            for i, sub in enumerate(subs):
                text = sub.text

                start = sub.start.ordinal / 1000
                end = sub.end.ordinal / 1000
                try:
                    idx = text.index("<")
                    pre_text = text[:idx]
                    regx = re.compile(r"<(.*?)><c>(.*?)</c>")
                    for t_str, match in regx.findall(text):
                        pre_text += match
                        t = datetime.strptime(t_str, r"%H:%M:%S.%f")
                        sec = (t.hour * 3600 + t.minute * 60 + t.second +
                               t.microsecond / 10**len((str(t.microsecond))))
                        final_text = pre_text.split("\n")[-1]

                        if caption_list and (sec - start <= threshold
                                             or caption_list[-1]["content"]
                                             == final_text):
                            caption_list[-1].update({
                                "to": sec,
                                "content": final_text,
                            })
                        else:
                            caption_list.append({
                                "from": start,
                                "to": sec,
                                "location": 2,
                                "content": final_text,
                            })
                        start = sec
                except:
                    final_text = sub.text.split("\n")[-1]
                    if caption_list and caption_list[-1][
                            "content"] == final_text:
                        caption_list[-1].update({
                            "to": end,
                            "content": final_text,
                        })
                    else:
                        if caption_list and end - start < threshold:
                            start = caption_list[-1]["to"]
                        caption_list.append({
                            "from": start,
                            "to": end,
                            "location": 2,
                            "content": final_text,
                        })

        # print(len(caption_list))
        # NOTE 避免超出视频长度
        last = caption_list[-1]
        last["to"] = last.get("from") + 0.1
        bcc = {
            "font_size": 0.4,
            "font_color": "#FFFFFF",
            "background_alpha": 0.5,
            "background_color": "#9C27B0",
            "Stroke": "none",
            "body": caption_list,
        }

        return bcc if subs else {}