class TestShifting(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_shift_up(self): self.item.shift(1, 2, 3, 4) self.assertEqual(self.item.start, (1, 3, 3, 4)) self.assertEqual(self.item.end, (1, 3, 23, 4)) self.assertEqual(self.item.duration, (0, 0, 20, 0)) self.assertEqual(self.item.characters_per_second, 0.65) def test_shift_down(self): self.item.shift(5) self.item.shift(-1, -2, -3, -4) self.assertEqual(self.item.start, (3, 58, 56, 996)) self.assertEqual(self.item.end, (3, 59, 16, 996)) self.assertEqual(self.item.duration, (0, 0, 20, 0)) self.assertEqual(self.item.characters_per_second, 0.65) def test_shift_by_ratio(self): self.item.shift(ratio=2) self.assertEqual(self.item.start, {'minutes': 2}) self.assertEqual(self.item.end, {'minutes': 2, 'seconds': 40}) self.assertEqual(self.item.duration, (0, 0, 40, 0)) self.assertEqual(self.item.characters_per_second, 0.325)
class TestTagRemoval(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_italics_tag(self): self.item.text = "<i>Hello world !</i>" self.assertEqual(self.item.text_without_tags,'Hello world !') def test_bold_tag(self): self.item.text = "<b>Hello world !</b>" self.assertEqual(self.item.text_without_tags,'Hello world !') def test_underline_tag(self): self.item.text = "<u>Hello world !</u>" self.assertEqual(self.item.text_without_tags,'Hello world !') def test_color_tag(self): self.item.text = '<font color="#ff0000">Hello world !</font>' self.assertEqual(self.item.text_without_tags,'Hello world !') def test_all_tags(self): self.item.text = '<b>Bold</b>, <i>italic</i>, <u>underlined</u>\n' + \ '<font color="#ff0000">red text</font>' + \ ', <b>one,<i> two,<u> three</u></i></b>.' self.assertEqual(self.item.text_without_tags,'Bold, italic, underlined' + \ '\nred text, one, two, three.')
class TestTagRemoval(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_italics_tag(self): self.item.text = "<i>Hello world !</i>" self.assertEqual(self.item.text_without_tags, 'Hello world !') def test_bold_tag(self): self.item.text = "<b>Hello world !</b>" self.assertEqual(self.item.text_without_tags, 'Hello world !') def test_underline_tag(self): self.item.text = "<u>Hello world !</u>" self.assertEqual(self.item.text_without_tags, 'Hello world !') def test_color_tag(self): self.item.text = '<font color="#ff0000">Hello world !</font>' self.assertEqual(self.item.text_without_tags, 'Hello world !') def test_all_tags(self): self.item.text = '<b>Bold</b>, <i>italic</i>, <u>underlined</u>\n' + \ '<font color="#ff0000">red text</font>' + \ ', <b>one,<i> two,<u> three</u></i></b>.' self.assertEqual(self.item.text_without_tags,'Bold, italic, underlined' + \ '\nred text, one, two, three.')
def json_to_srt(deepspeech_json, max_word_time=10, min_sub_time=1.5, max_sub_time=3): index = 0 subtitle = "" start_time = 0 end_time = 0 subtitles = SubRipFile() for word in deepspeech_json["words"]: word["end_time"] = word["start_time"] + word["duration"] if word["duration"] < max_word_time: if start_time + max_sub_time >= word["end_time"] and subtitle: subtitle += " " subtitle += word["word"] end_time = max(word["end_time"], start_time + min_sub_time) elif subtitle: # Convert to milliseconds subtitles.append( SubRipItem(index=++index, start=int(start_time*1000), end=int(end_time*1000), text=subtitle)) subtitle = "" if not subtitle: start_time = word["start_time"] subtitle += word["word"] end_time = max(word["end_time"], start_time + min_sub_time) if subtitle: subtitles.append(SubRipItem(index=++index, start=int(start_time*1000), end=int(end_time*1000), text=subtitle)) return subtitles
class TestSerialAndParsing(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) self.string = u'1\n00:01:00,000 --> 00:01:20,000\nHello world !\n' self.bad_string = u'foobar' self.coordinates = (u'1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') self.vtt = (u'1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start ' 'L:12%\nHello world !\n') self.dots = u'1\n00:01:00.000 --> 00:01:20.000\nHello world !\n' self.string_index = u'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n' self.no_index = u'00:01:00,000 --> 00:01:20,000\nHello world !\n' def test_serialization(self): self.assertEqual(unicode(self.item), self.string) def test_from_string(self): self.assertEquals(SubRipItem.from_string(self.string), self.item) self.assertRaises(InvalidItem, SubRipItem.from_string, self.bad_string) def test_coordinates(self): item = SubRipItem.from_string(self.coordinates) self.assertEquals(item, self.item) self.assertEquals(item.position, 'X1:000 X2:000 Y1:050 Y2:100') def test_vtt_positioning(self): vtt = SubRipItem.from_string(self.vtt) self.assertEquals(vtt.position, 'D:vertical A:start L:12%') self.assertEquals(vtt.index, 1) self.assertEquals(vtt.text, 'Hello world !') def test_idempotence(self): vtt = SubRipItem.from_string(self.vtt) self.assertEquals(unicode(vtt), self.vtt) item = SubRipItem.from_string(self.coordinates) self.assertEquals(unicode(item), self.coordinates) def test_dots(self): self.assertEquals(SubRipItem.from_string(self.dots), self.item) # Bug reported in https://github.com/byroot/pysrt/issues/16 def test_paring_error(self): self.assertRaises(InvalidItem, SubRipItem.from_string, u'1\n' '00:01:00,000 -> 00:01:20,000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') def test_string_index(self): item = SubRipItem.from_string(self.string_index) self.assertEquals(item.index, 'foo') self.assertEquals(item.text, 'Hello !') def test_no_index(self): item = SubRipItem.from_string(self.no_index) self.assertEquals(item.index, None) self.assertEquals(item.text, 'Hello world !')
class TestOperators(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_cmp(self): self.assertEqual(self.item, self.item)
class TestDuration(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_duration(self): self.assertEqual(self.item.duration, (0, 0, 20, 0))
class TestOperators(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_cmp(self): self.assertEquals(self.item, self.item)
def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) self.string = u'1\n00:01:00,000 --> 00:01:20,000\nHello world !\n' self.bad_string = u'foobar' self.coordinates = (u'1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') self.vtt = (u'1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start ' 'L:12%\nHello world !\n') self.dots = u'1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
class TestCPS(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_characters_per_second(self): self.assertEqual(self.item.characters_per_second, 0.65) def test_text_change(self): self.item.text = "Hello world !\nHello world again !" self.assertEqual(self.item.characters_per_second, 1.6)
class TestSerialAndParsing(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) self.string = u'1\n00:01:00,000 --> 00:01:20,000\nHello world !\n' self.bad_string = u'foobar' self.coordinates = (u'1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') self.vtt = (u'1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start ' 'L:12%\nHello world !\n') self.dots = u'1\n00:01:00.000 --> 00:01:20.000\nHello world !\n' self.bad_index = u'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n' def test_serialization(self): self.assertEqual(unicode(self.item), self.string) def test_from_string(self): self.assertEquals(SubRipItem.from_string(self.string), self.item) self.assertRaises(InvalidItem, SubRipItem.from_string, self.bad_string) def test_coordinates(self): item = SubRipItem.from_string(self.coordinates) self.assertEquals(item, self.item) self.assertEquals(item.position, 'X1:000 X2:000 Y1:050 Y2:100') def test_vtt_positioning(self): vtt = SubRipItem.from_string(self.vtt) self.assertEquals(vtt.position, 'D:vertical A:start L:12%') self.assertEquals(vtt.index, 1) self.assertEquals(vtt.text, 'Hello world !') def test_idempotence(self): vtt = SubRipItem.from_string(self.vtt) self.assertEquals(unicode(vtt), self.vtt) item = SubRipItem.from_string(self.coordinates) self.assertEquals(unicode(item), self.coordinates) def test_dots(self): self.assertEquals(SubRipItem.from_string(self.dots), self.item) # Bug reported in https://github.com/byroot/pysrt/issues/16 def test_paring_error(self): self.assertRaises(InvalidItem, SubRipItem.from_string, u'1\n' '00:01:00,000 -> 00:01:20,000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') def test_invalid_index(self): self.assertRaises(InvalidItem, SubRipItem.from_string, self.bad_index)
def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) self.string = '1\n00:01:00,000 --> 00:01:20,000\nHello world !\n' self.bad_string = 'foobar' self.coordinates = ('1\n00:01:00,000 --> 00:01:20,000 X1:000 X2:000 ' 'Y1:050 Y2:100\nHello world !\n') self.vtt = ('1\n00:01:00,000 --> 00:01:20,000 D:vertical A:start ' 'L:12%\nHello world !\n') self.string_index = 'foo\n00:01:00,000 --> 00:01:20,000\nHello !\n' self.dots = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n' self.no_index = '00:01:00,000 --> 00:01:20,000\nHello world !\n' self.junk_after_timestamp = ('1\n00:01:00,000 --> 00:01:20,000?\n' 'Hello world !\n')
def processSub(sub_L1, sub_L2, levels, outs, removed_lines, show_L2): text_L1 = sub_L1.text.lstrip() text_L2 = sub_L2.text.lstrip() if (text_L2 is not None) and (len(text_L2) > 0): cefr_level, flesh_kincade_grade, n_words = analyzeSubLevel(text_L2) else: flesh_kincade_grade = "" cefr_level = "" n_words = 0 for level in levels: if (text_L2 is not None) and (len(text_L2) > 0) and isTextNotAboveLevel( level, cefr_level, flesh_kincade_grade, n_words, len(text_L2)): removed_lines[level] = removed_lines[level] + 1 text = "" if show_L2 == "no" else this.L2_sub_template.format( text_L2) else: text = joinLines( text_L2, text_L1 ) if show_L2 == "yes" else this.L1_sub_template.format(text_L1) if len(text) > 0: item = SubRipItem(sub_L2.index, sub_L2.start, sub_L2.end, text) outs[level].append(item)
def setUp(self): original = """77777777 333 1 55555 55555 4444 4444 22 22 22""" self.item = SubRipItem(1, text=original) self.item.break_lines(5)
def process(self, subs: SubRipFile, items: List[PgsSubtitleItem], post_process, confidence: int, max_width: int): full_image = FullImage.from_items(items, self.gap, max_width) config = {'output_type': tess.Output.DICT, 'config': '--psm 11'} if self.pgs.language: config.update({'lang': self.pgs.language.alpha3}) if self.omp_thread_limit: os.environ['OMP_THREAD_LIMIT'] = str(self.omp_thread_limit) # cv2.imwrite(f'{subs.path}-{len(items)}-{confidence}.png', full_image.data) data = TsvData(tess.image_to_data(full_image.data, **config)) remaining = [] for item in items: text = self.accept(data, item, confidence) if text is None: remaining.append(item) continue text = item.text if post_process: text = post_process(text) if text: item = SubRipItem(0, item.start, item.end, text) subs.append(item) return remaining
def generate_srt_from_sjson(sjson_subs, speed): """Generate transcripts with speed = 1.0 from sjson to SubRip (*.srt). :param sjson_subs: "sjson" subs. :param speed: speed of `sjson_subs`. :returns: "srt" subs. """ output = '' equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len( sjson_subs['text']) if not equal_len: return output sjson_speed_1 = generate_subs(speed, 1, sjson_subs) for i in range(len(sjson_speed_1['start'])): item = SubRipItem( index=i, start=SubRipTime(milliseconds=sjson_speed_1['start'][i]), end=SubRipTime(milliseconds=sjson_speed_1['end'][i]), text=sjson_speed_1['text'][i]) output += (unicode(item)) output += '\n' return output
def dmxread_callback(frame, frame_no): global prevFrame, prevTime, subs, srtFile # if prevFrame. == 0: # prevFrame = array(frame) frameArray = array(frame) if not array_equal(prevFrame,frameArray): if frame != None: item = SubRipItem(1, text="DMX1"+str(frame)) item.shift(seconds=prevTime) item.end.shift(seconds=perf_counter()-prevTime) if VERBOSE: print(item) subs.append(item) srtFile.append(item) prevTime = perf_counter() prevFrame = array(frame)
def generate_srt_from_sjson(sjson_subs): """ Generate transcripts from sjson to SubRip (*.srt) Arguments: sjson_subs (dict): `sjson` subs. Returns: Subtitles in SRT format. """ output = '' equal_len = len(sjson_subs['start']) == len(sjson_subs['end']) == len(sjson_subs['text']) if not equal_len: return output for i in range(len(sjson_subs['start'])): item = SubRipItem( index=i, start=SubRipTime(milliseconds=sjson_subs['start'][i]), end=SubRipTime(milliseconds=sjson_subs['end'][i]), text=sjson_subs['text'][i] ) output += (str(item)) output += '\n' return output
def play_record_dmx(unused_addr, args, value): global INTERVAL, TRANSITION_TIME, previous_time, dmxCounter, VERBOSE global prev_frame, prev_time, subs, srtFile, previous_dmx_time, DMX_INTERVAL, sub_incr global dmx dmx_array[int(args[0])] = int(value * 255) current_dmx_time = time.time() elapsed_dmx_time = current_dmx_time - previous_dmx_time if DEBUG: print("DMX time", current_dmx_time, previous_dmx_time, elapsed_dmx_time) if (elapsed_dmx_time > DMX_INTERVAL): frameArray = trim_zeros(dmx_array, 'b').astype('uint8') # frameArray = array(frameArray) print(array_equal(prev_frame, frameArray), tuple(prev_frame), tuple(frameArray)) if not array_equal(prev_frame, frameArray): if frameArray.any() != None: item = SubRipItem(sub_incr, text="DMX1" + str(tuple(frameArray)[1:]).replace(" ", "")) item.shift(seconds=prev_time) item.end.shift(seconds=perf_counter() - prev_time) if VERBOSE: print(item) subs.append(item) if srtFile != None: srtFile.append(item) # encoding="utf_8" # srtFile.save(SRT_FILENAME, encoding=encoding) sub_incr += 1 prev_time = perf_counter() if PLAY_DMX: if DEBUG: print("DMX tuple", tuple(frameArray)[1:]) dmx.write_frame(tuple(frameArray)[1:]) if not array_equal(prev_frame, frameArray): prev_frame = frameArray previous_dmx_time = time.time() print(previous_dmx_time)
def refragment_with_min_duration( subs: List[SubRipItem], minimum_segment_duration: float) -> List[SubRipItem]: """Re-fragment a list of subtitle cues into new cues each of spans a minimum duration Arguments: subs {list} -- A list of SupRip cues. minimum_segment_duration {float} -- The minimum duration in seconds for each output subtitle cue. Returns: list -- A list of new SupRip cues after fragmentation. """ new_segment = [] new_segment_index = 0 new_segment_duration = 0.0 new_segment_text = "" new_subs = [] for sub in subs: if minimum_segment_duration > new_segment_duration: new_segment.append(sub) new_segment_duration += MediaHelper.get_duration_in_seconds( str(sub.start), str(sub.end)) or 0.0 new_segment_text += "{}\n".format(sub.text) else: concatenated_item = SubRipItem(new_segment_index, new_segment[0].start, new_segment[-1].end, new_segment_text, new_segment[0].position) new_subs.append(concatenated_item) new_segment_index += 1 new_segment = [sub] new_segment_duration = MediaHelper.get_duration_in_seconds( str(sub.start), str(sub.end)) or 0.0 new_segment_text = "{}\n".format(sub.text) if new_segment: concatenated_item = SubRipItem(new_segment_index, new_segment[0].start, new_segment[-1].end, new_segment_text, new_segment[0].position) new_subs.append(concatenated_item) return new_subs
class TestBreakingLines(unittest.TestCase): def setUp(self): original = """77777777 333 1 55555 55555 4444 4444 22 22 22""" self.item = SubRipItem(1, text=original) self.item.break_lines(5) def test_break(self): shouldbe = """77777777 333 1 55555 55555 4444 4444 22 22 22""" self.assertEqual(shouldbe, self.item.text)
class TestCPS(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_characters_per_second(self): self.assertEqual(self.item.characters_per_second, 0.65) def test_text_change(self): self.item.text = "Hello world !\nHello world again !" self.assertEqual(self.item.characters_per_second, 1.6) def test_zero_duration(self): self.item.start.shift(seconds=20) self.assertEqual(self.item.characters_per_second, 0.0) def test_tags(self): self.item.text = '<b>bold</b>, <i>italic</i>, <u>underlined</u>\n' + \ '<font color="#ff0000">red text</font>' + \ ', <b>one,<i> two,<u> three</u></i></b>' self.assertEqual(self.item.characters_per_second, 2.45)
def generate_srt(self, text: str): """ Generates .srt file with the given text and timestamps. :param text: String with all retrieved text. """ self.create_subs_path() subs = open_srt(self.srt_path) texts = self.prepare_text(text.split(" ")) timestamps = self.prepare_timestamps(texts) for i, (sentence, (start_timestamp, end_timestamp)) in enumerate(zip(texts, timestamps)): start_timestamp_list = [ int(ts) for ts in start_timestamp.split(':') ] end_timestamp_list = [int(ts) for ts in end_timestamp.split(':')] sub = SubRipItem(index=i) sub.text = sentence sub.start = SubRipTime(hours=start_timestamp_list[0], minutes=start_timestamp_list[1], seconds=start_timestamp_list[2], milliseconds=start_timestamp_list[3]) sub.end = SubRipTime(hours=end_timestamp_list[0], minutes=end_timestamp_list[1], seconds=end_timestamp_list[2], milliseconds=end_timestamp_list[3]) subs.append(sub) # Saving result subtitles into file subs.save(self.srt_path, encoding='utf-8') logging.info(f"Generated subtitles are saved in {self.srt_path}")
class TestCPS(unittest.TestCase): def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20) def test_characters_per_second(self): self.assertEqual(self.item.characters_per_second, 0.65) def test_text_change(self): self.item.text = "Hello world !\nHello world again !" self.assertEqual(self.item.characters_per_second, 1.6) def test_zero_duration(self): self.item.start.shift(seconds = 20) self.assertEqual(self.item.characters_per_second, 0.0) def test_tags(self): self.item.text = '<b>bold</b>, <i>italic</i>, <u>underlined</u>\n' + \ '<font color="#ff0000">red text</font>' + \ ', <b>one,<i> two,<u> three</u></i></b>' self.assertEqual(self.item.characters_per_second, 2.45)
def get_captions(client_name, clip_id): h = httplib2.Http() g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id) print "Fetching URL: %s" % g_url response, j = h.request(g_url) dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name filename = dirname + "%s.srt" % clip_id subs = SubRipFile() if response.get('status') == '200': captions = [] try: j = json.loads(j, strict=False)[0] except ValueError: ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "") try: j = json.loads(ts, strict=False)[0] except UnicodeDecodeError: ts = unicode(ts, errors='ignore') j = json.loads(ts, strict=False)[0] except: j = False sub_count = 0 for item in j: if item["type"] == "text": cap = item["text"] offset = round(float(item["time"]), 3) captions.append({'time': offset, 'text': cap}) end = get_cap_end(j, sub_count) if end: subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap) subs.append(subtitle) sub_count = sub_count + 1 try: subs.save(path=filename, encoding="utf-8") except IOError: p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE) t = p.wait() subs.save(path=filename, encoding="utf-8") s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id)) return (captions, s3_url) else: return ([], '')
def save(self, path): if path.endswith('srt'): verify_dependencies(['pysrt']) from pysrt import SubRipFile, SubRipItem from datetime import time out = SubRipFile() for elem in self._elements: start = time(*self._to_tup(elem.onset)) end = time(*self._to_tup(elem.onset + elem.duration)) out.append(SubRipItem(0, start, end, elem.text)) out.save(path) else: with open(path, 'w') as f: f.write('onset\ttext\tduration\n') for elem in self._elements: f.write('{}\t{}\t{}\n'.format(elem.onset, elem.text, elem.duration))
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'): """ 合并两种不同言语的srt字幕 因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕, 导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免 参考https://github.com/byroot/pysrt/issues/17 https://github.com/byroot/pysrt/issues/15 :param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding) :param sub_b: :param delta: :return: """ out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i - 1]) end = SubRipTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def merge_subtitle(sub_a, sub_b, delta): out = SubRipFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in range(1, len(intervals)): start = SubRipTime.from_ordinal(intervals[i - 1]) end = SubRipTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = SubRipItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def test_string_index(self): item = SubRipItem.from_string(self.string_index) self.assertEquals(item.index, 'foo') self.assertEquals(item.text, 'Hello !')
def test_vtt_positioning(self): vtt = SubRipItem.from_string(self.vtt) self.assertEquals(vtt.position, 'D:vertical A:start L:12%') self.assertEquals(vtt.index, 1) self.assertEquals(vtt.text, 'Hello world !')
def test_coordinates(self): item = SubRipItem.from_string(self.coordinates) self.assertEquals(item, self.item) self.assertEquals(item.position, 'X1:000 X2:000 Y1:050 Y2:100')
def test_from_string(self): self.assertEquals(SubRipItem.from_string(self.string), self.item) self.assertRaises(InvalidItem, SubRipItem.from_string, self.bad_string)
def setUp(self): self.item = SubRipItem(1, text="Hello world !") self.item.shift(minutes=1) self.item.end.shift(seconds=20)
def setUp(self): self.item = SubRipItem()
def test_junk_after_timestamp(self): item = SubRipItem.from_string(self.junk_after_timestamp) self.assertEquals(item, self.item)
def test_dots(self): self.assertEquals(SubRipItem.from_string(self.dots), self.item)
def test_idempotence(self): vtt = SubRipItem.from_string(self.vtt) self.assertEquals(unicode(vtt), self.vtt) item = SubRipItem.from_string(self.coordinates) self.assertEquals(unicode(item), self.coordinates)
def test_no_index(self): item = SubRipItem.from_string(self.no_index) self.assertEquals(item.index, None) self.assertEquals(item.text, 'Hello world !')