def test_compare_trailings_with_ref(self): ref_path2 = os.path.join(self.static_path, 'ref_notrailings.vtt') vtt_file_ref2 = pyvtt.open( ref_path2, encoding='utf_8') # Reference file (clean, no whitespaces). vtt_file_ut = pyvtt.open(self.test_trailings_path, encoding='utf_8') vtt_file_ut.clean_text( tags=False, keys=False, trailing=True ) # Only trailing removal (whitespaces at end(beginning) is enabled. self.assertEqual(vtt_file_ref2.text, vtt_file_ut.text)
def test_compare_replacements_with_ref(self): ref_path2 = os.path.join(self.static_path, 'ref_replacements.vtt') vtt_file_ref2 = pyvtt.open( ref_path2, encoding='utf_8') # Reference file (clean, no whitespaces). vtt_file_ut = pyvtt.open(self.test_replacements_path, encoding='utf_8') vtt_file_ut.apply_replacements(replacements={ '&': 'and', '+': 'plus' }) # Only & -> and replacement self.assertEqual(vtt_file_ref2.text, vtt_file_ut.text)
def test_eol_preservation(self): # Tests input eol is kept after saving self.temp_eol_path = os.path.join(self.static_path, 'temp_eol_preserv.vtt') end_of_lines = ['\n', '\r', '\r\n'] enc = 'utf-8' for eols in end_of_lines: input_eol = open(self.temp_eol_path, 'wb') input_eol.write( str('00:01:00,000 --> 00:02:00,000' + eols + 'TestEOLPreservation' + eols)) input_eol.close() input_file = open(self.temp_eol_path, 'rU', encoding=enc) input_file.read() self.assertEqual(eols, input_file.newlines) vtt_file = pyvtt.open(self.temp_eol_path, encoding=enc) vtt_file.save(self.temp_eol_path, eol=input_file.newlines) output_file = open(self.temp_eol_path, 'rU', encoding=enc) output_file.read() self.assertEqual(output_file.newlines, input_file.newlines) os.remove(self.temp_eol_path)
def test_windows1252(self): vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') self.assertEqual(len(vtt_file), 1332) self.assertEqual(vtt_file.eol, '\r\n') self.assertRaises(UnicodeDecodeError, pyvtt.open, self.utf8_path, encoding='ascii')
def test_compare_shift_with_ref(self): vtt_file_ref = pyvtt.open(self.ref_dur_shifted_path, encoding='utf_8') vtt_file_ut1 = pyvtt.open(self.test_duration_path, encoding='utf_8') vtt_file_ut2 = pyvtt.open(self.test_duration_path, encoding='utf_8') ref_ratio_path = os.path.join(self.static_path, 'ref_duration_ratio.vtt') vtt_file_ref_ratio = pyvtt.open(ref_ratio_path, encoding='utf_8') vtt_file_ut1.shift( hours=5, minutes=5, seconds=5, milliseconds=500 ) # Shifted 5 hours, 5 minutes, 5 seconds, 500 milliseconds. self.assertEqual(vtt_file_ut1, vtt_file_ref) vtt_file_ut1.shift( hours=-5, minutes=-5, seconds=-5, milliseconds=-500 ) # Shifted BACK 5 hours, 5 minutes, 5 seconds, 500 milliseconds. self.assertEqual(vtt_file_ut1, vtt_file_ut2) vtt_file_ut1.shift(ratio=2) self.assertEqual(vtt_file_ut1, vtt_file_ref_ratio) # Shifted with a ratio of 2.
def test_eol_conversion(self): input_file = open(self.windows_path, 'rU', encoding='windows-1252') input_file.read() self.assertEqual(input_file.newlines, '\r\n') vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') vtt_file.save(self.temp_path, eol='\n') output_file = open(self.temp_path, 'rU', encoding='windows-1252') output_file.read() self.assertEqual(output_file.newlines, '\n')
def test_eol_conversion(self): input_file = open(self.windows_path, 'rU', encoding='windows-1252') input_file.read() self.assertEqual(input_file.newlines, '\r\n') vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') vtt_file.save(self.temp_path, eol='\n') output_file = open(self.temp_path, 'rU', encoding='windows-1252') output_file.read() self.assertEqual(output_file.newlines, '\n')
def test_save_with_indexes(self): file = pyvtt.open(os.path.join(self.static_path, 'no-indexes.srt')) file.clean_indexes() file_with_indexes = os.path.join(file_path, 'tests', 'vtt_test', 'file_with_indexes.vtt') file_with_indexes_target_path = os.path.join( file_path, 'tests', 'vtt_test', 'file_with_indexes_target.vtt') file.save(file_with_indexes_target_path, include_indexes=True) self.assertEqual( bytes(open(file_with_indexes, 'rb').read()), bytes(open(file_with_indexes_target_path, 'rb').read())) os.remove(file_with_indexes_target_path)
def test_save_overwrite(self): overwrite_source_path1 = os.path.join(file_path, 'tests', 'vtt_test', 'overwrite_source1.vtt') overwrite_source_path2 = os.path.join(file_path, 'tests', 'vtt_test', 'overwrite_source2.vtt') overwrite_target_path = os.path.join(file_path, 'tests', 'vtt_test', 'overwrite_target.vtt') vtt_file1 = pyvtt.open(overwrite_source_path1, encoding='utf-8') vtt_file1.save(overwrite_target_path, eol=vtt_file1._eol, encoding=vtt_file1.encoding) self.assertEqual(bytes(open(overwrite_source_path1, 'rb').read()), bytes(open(overwrite_target_path, 'rb').read())) vtt_file2 = pyvtt.open(overwrite_source_path2, encoding='utf-8') vtt_file2.save(overwrite_target_path, eol=vtt_file2._eol, encoding=vtt_file2.encoding) self.assertEqual(bytes(open(overwrite_source_path2, 'rb').read()), bytes(open(overwrite_target_path, 'rb').read())) os.remove(overwrite_target_path)
def test_compare_slice_with_ref(self): vtt_file_ref = pyvtt.open(self.ref_dur_sliced_path, encoding='utf_8') vtt_file_source = pyvtt.open(self.test_duration_path, encoding='utf_8') temp_file_path = os.path.join(self.static_path, 'temp_test.vtt') vtt_file_ut = vtt_file_source.slice(starts_after={'minutes': 2}) self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path) os.remove(temp_file_path) vtt_file_ut = vtt_file_source.slice(starts_after={'seconds': 20}, ends_before={'seconds': 42}) vtt_file_ut.save(temp_file_path, eol='\n', encoding='utf_8') self.assertEqual(vtt_file_ut, vtt_file_ref) os.remove(temp_file_path) vtt_file_ut = vtt_file_source.slice(starts_after={'seconds': -20}, ends_before={'seconds': -42}) self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path) os.remove(temp_file_path) vtt_file_ut = vtt_file_source.slice( ends_before={'seconds': 42}, ends_after={'seconds': 40}) # ends_before > ends_after self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path) os.remove(temp_file_path) vtt_file_ut = vtt_file_source.slice( starts_before={'seconds': 10}, starts_after={'seconds': 30}) # starts_before < starts_after self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path) os.remove(temp_file_path) vtt_file_ut = vtt_file_source.slice( starts_after={'seconds': 42}, ends_before={'seconds': 30}) # starts_after > ends_before self.assertRaises(InvalidFile, vtt_file_ut.save, temp_file_path) os.remove(temp_file_path)
def setUp(self): self.static_path = os.path.join(file_path, 'tests', 'vtt_test') self.ref_path = os.path.join(self.static_path, 'ref.vtt') self.ref_dur_shifted_path = os.path.join(self.static_path, 'ref_duration_shifted.vtt') self.ref_dur_sliced_path = os.path.join(self.static_path, 'ref_duration_sliced.vtt') self.test_tags_path = os.path.join(self.static_path, 'test_tags.vtt') self.test_keys_path = os.path.join(self.static_path, 'test_keys.vtt') self.test_trailings_path = os.path.join(self.static_path, 'test_trailings.vtt') self.test_duration_path = os.path.join(self.static_path, 'test_duration.vtt') self.test_replacements_path = os.path.join(self.static_path, 'test_replacements.vtt') self.vtt_file_ref = pyvtt.open( self.ref_path, encoding='utf_8') # Reference file (clean, no tags/keys)
def test_missing_indexes(self): items = pyvtt.open(os.path.join(self.base_path, 'no-indexes.srt')) self.assertEquals(len(items), 7)
def subtitles2timestamps(input_path): return [(caption.text, caption.start.ordinal / 1000, caption.end.ordinal / 1000) for caption in pyvtt.open(input_path)]
def test_save_empty_slice(self): vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') sliced_file = vtt_file.slice(starts_after=(0, 0, 0, 0), ends_before=(0, 0, 0, 0)) self.assertEqual(len(sliced_file), 0) self.assertRaises(InvalidFile, sliced_file.save, self.temp_path)
def test_compare_from_string_and_from_path(self): unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read() iterator = zip(pyvtt.open(self.utf8_path), pyvtt.from_string(unicode_content)) for file_item, string_item in iterator: self.assertEqual(str(file_item), str(string_item))
def test_missing_indexes(self): items = pyvtt.open(os.path.join(self.base_path, 'no-indexes.srt')) self.assertEquals(len(items), 7)
def test_length(self): path = os.path.join(self.base_path, 'capability_tester.srt') file = pyvtt.open(path) self.assertEqual(len(file), 37)
def setUp(self): self.file = pyvtt.open(os.path.join(file_path, 'tests', 'static', 'utf-8.vtt'))
def test_save_empty_slice(self): vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') sliced_file = vtt_file.slice(starts_after=(0, 0, 0, 0), ends_before=(0, 0, 0, 0)) self.assertEqual(len(sliced_file), 0) self.assertRaises(InvalidFile, sliced_file.save, self.temp_path)
def test_save_new_eol_and_encoding(self): vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') vtt_file.save(self.temp_path, eol='\n', encoding='utf-8') self.assertEqual(bytes(open(self.temp_path, 'rb').read()), bytes(open(self.utf8_path, 'rb').read())) os.remove(self.temp_path)
def test_compare_from_string_and_from_path(self): unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read() iterator = zip(pyvtt.open(self.utf8_path), pyvtt.from_string(unicode_content)) for file_item, string_item in iterator: self.assertEqual(str(file_item), str(string_item))
def test_compare_tags_with_ref(self): vtt_file_ut = pyvtt.open(self.test_tags_path, encoding='utf_8') vtt_file_ut.clean_text(tags=True, keys=False, trailing=False) # Only tags removal is enabled. self.assertEqual(self.vtt_file_ref.text, vtt_file_ut.text)
def test_utf8(self): self.assertEqual(len(pyvtt.open(self.utf8_path)), 1332) self.assertEqual(pyvtt.open(self.utf8_path).encoding, 'utf_8') self.assertRaises(UnicodeDecodeError, pyvtt.open, self.windows_path)
def __test_encoding(self, encoding): vtt_file = pyvtt.open(os.path.join(self.base_path, encoding)) self.assertEqual(len(vtt_file), 7) self.assertEqual(vtt_file[0].index, 1)
def subtitles2text(input_path): return '\n'.join( adjust_caption_text(caption.text) for caption in pyvtt.open(input_path))
def test_utf8(self): self.assertEqual(len(pyvtt.open(self.utf8_path)), 1332) self.assertEqual(pyvtt.open(self.utf8_path).encoding, 'utf_8') self.assertRaises(UnicodeDecodeError, pyvtt.open, self.windows_path)
def test_empty_file(self): file = pyvtt.open('/dev/null', error_handling=WebVTTFile.ERROR_RAISE) self.assertEqual(len(file), 0)
def test_windows1252(self): vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') self.assertEqual(len(vtt_file), 1332) self.assertEqual(vtt_file.eol, '\r\n') self.assertRaises(UnicodeDecodeError, pyvtt.open, self.utf8_path, encoding='ascii')
def setUp(self): self.file = pyvtt.open( os.path.join(file_path, 'tests', 'static', 'utf-8.vtt'))
def test_save(self): vtt_file = pyvtt.open(self.windows_path, encoding='windows-1252') vtt_file.save(self.temp_path, eol='\n', encoding='utf-8') self.assertEqual(bytes(open(self.temp_path, 'rb').read()), bytes(open(self.utf8_path, 'rb').read())) os.remove(self.temp_path)
def __test_encoding(self, encoding): vtt_file = pyvtt.open(os.path.join(self.base_path, encoding)) self.assertEqual(len(vtt_file), 7) self.assertEqual(vtt_file[0].index, 1)
def test_length(self): path = os.path.join(self.base_path, 'capability_tester.srt') file = pyvtt.open(path) self.assertEqual(len(file), 37)
def vtt2bcc(path, threshold=0.1, word=True): path = path if path else "" if os.path.exists(path): subs = pyvtt.open(path) else: subs = pyvtt.from_string(path) caption_list = [] if not word: caption_list = [{ "from": sub.start.ordinal / 1000, "to": sub.end.ordinal / 1000, "location": 2, "content": sub.text_without_tags.split("\n")[-1], } for sub in subs] else: # NOTE 按照 vtt 的断词模式分隔 bcc for i, sub in enumerate(subs): text = sub.text start = sub.start.ordinal / 1000 end = sub.end.ordinal / 1000 try: idx = text.index("<") pre_text = text[:idx] regx = re.compile(r"<(.*?)><c>(.*?)</c>") for t_str, match in regx.findall(text): pre_text += match t = datetime.strptime(t_str, r"%H:%M:%S.%f") sec = (t.hour * 3600 + t.minute * 60 + t.second + t.microsecond / 10**len((str(t.microsecond)))) final_text = pre_text.split("\n")[-1] if caption_list and (sec - start <= threshold or caption_list[-1]["content"] == final_text): caption_list[-1].update({ "to": sec, "content": final_text, }) else: caption_list.append({ "from": start, "to": sec, "location": 2, "content": final_text, }) start = sec except: final_text = sub.text.split("\n")[-1] if caption_list and caption_list[-1][ "content"] == final_text: caption_list[-1].update({ "to": end, "content": final_text, }) else: if caption_list and end - start < threshold: start = caption_list[-1]["to"] caption_list.append({ "from": start, "to": end, "location": 2, "content": final_text, }) # print(len(caption_list)) # NOTE 避免超出视频长度 last = caption_list[-1] last["to"] = last.get("from") + 0.1 bcc = { "font_size": 0.4, "font_color": "#FFFFFF", "background_alpha": 0.5, "background_color": "#9C27B0", "Stroke": "none", "body": caption_list, } return bcc if subs else {}