def compare_scripts(ideal_path, test_path, timecodes, test_name, expected_errors): ideal_script = AssScript.from_file(ideal_path) test_script = AssScript.from_file(test_path) if len(test_script.events) != len(ideal_script.events): logging.critical("Script length didn't match: {0} in ideal vs {1} in test. Test {2}".format( len(ideal_script.events), len(test_script.events), test_name) ) return False ideal_script.sort_by_time() test_script.sort_by_time() failed = 0 ft = format_time for idx, (ideal, test) in enumerate(zip(ideal_script.events, test_script.events)): ideal_start_frame = timecodes.get_frame_number(ideal.start) ideal_end_frame = timecodes.get_frame_number(ideal.end) test_start_frame = timecodes.get_frame_number(test.start) test_end_frame = timecodes.get_frame_number(test.end) if ideal_start_frame != test_start_frame and ideal_end_frame != test_end_frame: logging.debug(u'{0}: start and end time failed at "{1}". {2}-{3} vs {4}-{5}'.format( idx, strip_tags(ideal.text), ft(ideal.start), ft(ideal.end), ft(test.start), ft(test.end)) ) failed += 1 elif ideal_end_frame != test_end_frame: logging.debug( u'{0}: end time failed at "{1}". {2} vs {3}'.format( idx, strip_tags(ideal.text), ft(ideal.end), ft(test.end)) ) failed += 1 elif ideal_start_frame != test_start_frame: logging.debug( u'{0}: start time failed at "{1}". {2} vs {3}'.format( idx, strip_tags(ideal.text), ft(ideal.start), ft(test.start)) ) failed += 1 logging.info('Total lines: {0}, good: {1}, failed: {2}'.format(len(ideal_script.events), len(ideal_script.events)-failed, failed)) if failed > expected_errors: logging.critical('Got more failed lines than expected ({0} actual vs {1} expected)'.format(failed, expected_errors)) return False elif failed < expected_errors: logging.critical('Got less failed lines than expected ({0} actual vs {1} expected)'.format(failed, expected_errors)) return False else: logging.critical('Met expectations') return True
def test_read_from_file(self): text = """[Script Info] ; Script generated by Aegisub 3.1.1 Title: script title [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: Default,Open Sans Semibold,36,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,1.7,0,2,0,0,28,1 Style: Signs,Gentium Basic,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text Dialogue: 0,0:00:01.42,0:00:03.36,Default,,0000,0000,0000,,As you already know, Dialogue: 0,0:00:03.36,0:00:05.93,Default,,0000,0000,0000,,I'm concerned about the hair on my nipples.""" os.write(self.script_description, text) script = AssScript.from_file(self.script_path) self.assertEquals( ["; Script generated by Aegisub 3.1.1", "Title: script title"], script.script_info) self.assertEquals([ "Style: Default,Open Sans Semibold,36,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,1.7,0,2,0,0,28,1", "Style: Signs,Gentium Basic,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1" ], script.styles) self.assertEquals([1, 2], [x.source_index for x in script.events]) self.assertEquals( u"Dialogue: 0,0:00:01.42,0:00:03.36,Default,,0000,0000,0000,,As you already know,", unicode(script.events[0])) self.assertEquals( u"Dialogue: 0,0:00:03.36,0:00:05.93,Default,,0000,0000,0000,,I'm concerned about the hair on my nipples.", unicode(script.events[1]))
def test_read_from_file(self): text = """[Script Info] ; Script generated by Aegisub 3.1.1 Title: script title [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: Default,Open Sans Semibold,36,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,1.7,0,2,0,0,28,1 Style: Signs,Gentium Basic,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text Dialogue: 0,0:00:01.42,0:00:03.36,Default,,0000,0000,0000,,As you already know, Dialogue: 0,0:00:03.36,0:00:05.93,Default,,0000,0000,0000,,I'm concerned about the hair on my nipples.""" os.write(self.script_description, text) script = AssScript.from_file(self.script_path) self.assertEquals(["; Script generated by Aegisub 3.1.1", "Title: script title"], script.script_info) self.assertEquals(["Style: Default,Open Sans Semibold,36,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,1.7,0,2,0,0,28,1", "Style: Signs,Gentium Basic,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1"], script.styles) self.assertEquals([0, 1], [x.source_index for x in script.events]) self.assertEquals(u"Dialogue: 0,0:00:01.42,0:00:03.36,Default,,0000,0000,0000,,As you already know,", unicode(script.events[0])) self.assertEquals(u"Dialogue: 0,0:00:03.36,0:00:05.93,Default,,0000,0000,0000,,I'm concerned about the hair on my nipples.", unicode(script.events[1]))
def run(args): ignore_chapters = args.chapters_file is not None and args.chapters_file.lower() == 'none' write_plot = plot_enabled and args.plot_path if write_plot: plt.clf() plt.ylabel('Shift, seconds') plt.xlabel('Event index') # first part should do all possible validation and should NOT take significant amount of time check_file_exists(args.source, 'Source') check_file_exists(args.destination, 'Destination') check_file_exists(args.src_timecodes, 'Source timecodes') check_file_exists(args.dst_timecodes, 'Source timecodes') check_file_exists(args.script_file, 'Script') if not ignore_chapters: check_file_exists(args.chapters_file, 'Chapters') if args.src_keyframes not in ('auto', 'make'): check_file_exists(args.src_keyframes, 'Source keyframes') if args.dst_keyframes not in ('auto', 'make'): check_file_exists(args.dst_keyframes, 'Destination keyframes') if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps): raise SushiError('Both fps and timecodes file cannot be specified at the same time') src_demuxer = Demuxer(args.source) dst_demuxer = Demuxer(args.destination) if src_demuxer.is_wav and not args.script_file: raise SushiError("Script file isn't specified") if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes): raise SushiError('Either none or both of src and dst keyframes should be provided') create_directory_if_not_exists(args.temp_dir) # selecting source audio if src_demuxer.is_wav: src_audio_path = args.source else: src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav') src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate) # selecting destination audio if dst_demuxer.is_wav: dst_audio_path = args.destination else: dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav') dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate) # selecting source subtitles if args.script_file: src_script_path = args.script_file else: stype = src_demuxer.get_subs_type(args.src_script_idx) src_script_path = format_full_path(args.temp_dir, args.source, '.sushi'+ stype) src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path) script_extension = get_extension(src_script_path) if script_extension not in ('.ass', '.srt'): raise SushiError('Unknown script type') # selection destination subtitles if args.output_script: dst_script_path = args.output_script dst_script_extension = get_extension(args.output_script) if dst_script_extension != script_extension: raise SushiError("Source and destination script file types don't match ({0} vs {1})" .format(script_extension, dst_script_extension)) else: dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension) # selecting chapters if args.grouping and not ignore_chapters: if args.chapters_file: if get_extension(args.chapters_file) == '.xml': chapter_times = chapters.get_xml_start_times(args.chapters_file) else: chapter_times = chapters.get_ogm_start_times(args.chapters_file) elif not src_demuxer.is_wav: chapter_times = src_demuxer.chapters output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt") src_demuxer.set_chapters(output_path) else: chapter_times = [] else: chapter_times = [] # selecting keyframes and timecodes if args.src_keyframes: def select_keyframes(file_arg, demuxer): auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt') if file_arg in ('auto', 'make'): if file_arg == 'make' or not os.path.exists(auto_file): if not demuxer.has_video: raise SushiError("Cannot make keyframes for {0} because it doesn't have any video!" .format(demuxer.path)) demuxer.set_keyframes(output_path=auto_file) return auto_file else: return file_arg def select_timecodes(external_file, fps_arg, demuxer): if external_file: return external_file elif fps_arg: return None elif demuxer.has_video: path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt') demuxer.set_timecodes(output_path=path) return path else: raise SushiError('Fps, timecodes or video files must be provided if keyframes are used') src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer) dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer) src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer) dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer) # after this point nothing should fail so it's safe to start slow operations # like running the actual demuxing src_demuxer.demux() dst_demuxer.demux() try: if args.src_keyframes: src_timecodes = Timecodes.cfr(args.src_fps) if args.src_fps else Timecodes.from_file(src_timecodes_file) src_keytimes = [src_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(src_keyframes_file)] dst_timecodes = Timecodes.cfr(args.dst_fps) if args.dst_fps else Timecodes.from_file(dst_timecodes_file) dst_keytimes = [dst_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(dst_keyframes_file)] script = AssScript.from_file(src_script_path) if script_extension == '.ass' else SrtScript.from_file(src_script_path) script.sort_by_time() src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) search_groups = prepare_search_groups(script.events, source_duration=src_stream.duration_seconds, chapter_times=chapter_times, max_ts_duration=args.max_ts_duration, max_ts_distance=args.max_ts_distance) calculate_shifts(src_stream, dst_stream, search_groups, normal_window=args.window, max_window=args.max_window, rewind_thresh=args.rewind_thresh if args.grouping else 0) events = script.events if write_plot: plt.plot([x.shift for x in events], label='From audio') if args.grouping: if not ignore_chapters and chapter_times: groups = groups_from_chapters(events, chapter_times) for g in groups: fix_near_borders(g) smooth_events([x for x in g if not x.linked], args.smooth_radius) groups = split_broken_groups(groups) else: fix_near_borders(events) smooth_events([x for x in events if not x.linked], args.smooth_radius) groups = detect_groups(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') for g in groups: start_shift = g[0].shift end_shift = g[-1].shift avg_shift = average_shifts(g) logging.info(u'Group (start: {0}, end: {1}, lines: {2}), ' u'shifts (start: {3}, end: {4}, average: {5})' .format(format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift, avg_shift)) if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() for g in groups: snap_groups_to_keyframes(g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) else: fix_near_borders(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) for event in events: event.apply_shift() script.save_to_file(dst_script_path) if write_plot: plt.plot([x.shift + (x._start_shift + x._end_shift)/2.0 for x in events], label='After correction') plt.legend(fontsize=5, frameon=False, fancybox=False) plt.savefig(args.plot_path, dpi=300) finally: if args.cleanup: src_demuxer.cleanup() dst_demuxer.cleanup()
def run(args): ignore_chapters = args.chapters_file is not None and args.chapters_file.lower( ) == 'none' write_plot = plot_enabled and args.plot_path if write_plot: plt.clf() plt.ylabel('Shift, seconds') plt.xlabel('Event index') # first part should do all possible validation and should NOT take significant amount of time check_file_exists(args.source, 'Source') check_file_exists(args.destination, 'Destination') check_file_exists(args.src_timecodes, 'Source timecodes') check_file_exists(args.dst_timecodes, 'Source timecodes') check_file_exists(args.script_file, 'Script') if not ignore_chapters: check_file_exists(args.chapters_file, 'Chapters') if args.src_keyframes not in ('auto', 'make'): check_file_exists(args.src_keyframes, 'Source keyframes') if args.dst_keyframes not in ('auto', 'make'): check_file_exists(args.dst_keyframes, 'Destination keyframes') if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps): raise SushiError( 'Both fps and timecodes file cannot be specified at the same time') src_demuxer = Demuxer(args.source) dst_demuxer = Demuxer(args.destination) if src_demuxer.is_wav and not args.script_file: raise SushiError("Script file isn't specified") if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes): raise SushiError( 'Either none or both of src and dst keyframes should be provided') create_directory_if_not_exists(args.temp_dir) # selecting source audio if src_demuxer.is_wav: src_audio_path = args.source else: src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav') src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate) # selecting destination audio if dst_demuxer.is_wav: dst_audio_path = args.destination else: dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav') dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate) # selecting source subtitles if args.script_file: src_script_path = args.script_file else: stype = src_demuxer.get_subs_type(args.src_script_idx) src_script_path = format_full_path(args.temp_dir, args.source, '.sushi' + stype) src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path) script_extension = get_extension(src_script_path) if script_extension not in ('.ass', '.srt'): raise SushiError('Unknown script type') # selection destination subtitles if args.output_script: dst_script_path = args.output_script dst_script_extension = get_extension(args.output_script) if dst_script_extension != script_extension: raise SushiError( "Source and destination script file types don't match ({0} vs {1})" .format(script_extension, dst_script_extension)) else: dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension) # selecting chapters if args.grouping and not ignore_chapters: if args.chapters_file: if get_extension(args.chapters_file) == '.xml': chapter_times = chapters.get_xml_start_times( args.chapters_file) else: chapter_times = chapters.get_ogm_start_times( args.chapters_file) elif not src_demuxer.is_wav: chapter_times = src_demuxer.chapters output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt") src_demuxer.set_chapters(output_path) else: chapter_times = [] else: chapter_times = [] # selecting keyframes and timecodes if args.src_keyframes: def select_keyframes(file_arg, demuxer): auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt') if file_arg in ('auto', 'make'): if file_arg == 'make' or not os.path.exists(auto_file): if not demuxer.has_video: raise SushiError( "Cannot make keyframes for {0} because it doesn't have any video!" .format(demuxer.path)) demuxer.set_keyframes(output_path=auto_file) return auto_file else: return file_arg def select_timecodes(external_file, fps_arg, demuxer): if external_file: return external_file elif fps_arg: return None elif demuxer.has_video: path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt') demuxer.set_timecodes(output_path=path) return path else: raise SushiError( 'Fps, timecodes or video files must be provided if keyframes are used' ) src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer) dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer) src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer) dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer) # after this point nothing should fail so it's safe to start slow operations # like running the actual demuxing src_demuxer.demux() dst_demuxer.demux() try: if args.src_keyframes: src_timecodes = Timecodes.cfr( args.src_fps) if args.src_fps else Timecodes.from_file( src_timecodes_file) src_keytimes = [ src_timecodes.get_frame_time(f) for f in parse_keyframes(src_keyframes_file) ] dst_timecodes = Timecodes.cfr( args.dst_fps) if args.dst_fps else Timecodes.from_file( dst_timecodes_file) dst_keytimes = [ dst_timecodes.get_frame_time(f) for f in parse_keyframes(dst_keyframes_file) ] script = AssScript.from_file( src_script_path ) if script_extension == '.ass' else SrtScript.from_file( src_script_path) script.sort_by_time() src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) calculate_shifts( src_stream, dst_stream, script.events, chapter_times=chapter_times, window=args.window, max_window=args.max_window, rewind_thresh=args.rewind_thresh if args.grouping else 0, max_ts_duration=args.max_ts_duration, max_ts_distance=args.max_ts_distance) events = script.events if write_plot: plt.plot([x.shift for x in events], label='From audio') if args.grouping: if not ignore_chapters and chapter_times: groups = groups_from_chapters(events, chapter_times) for g in groups: fix_near_borders(g) smooth_events([x for x in g if not x.linked], args.smooth_radius) groups = split_broken_groups(groups, args.min_group_size) else: fix_near_borders(events) smooth_events([x for x in events if not x.linked], args.smooth_radius) groups = detect_groups(events, args.min_group_size) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') for g in groups: start_shift = g[0].shift end_shift = g[-1].shift avg_shift = average_shifts(g) logging.info( u'Group (start: {0}, end: {1}, lines: {2}), ' u'shifts (start: {3}, end: {4}, average: {5})'.format( format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift, avg_shift)) if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() for g in groups: snap_groups_to_keyframes( g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) if args.write_avs: write_shift_avs(dst_script_path + '.avs', groups, src_audio_path, dst_audio_path) else: fix_near_borders(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) for event in events: event.apply_shift() script.save_to_file(dst_script_path) if write_plot: plt.plot([ x.shift + (x._start_shift + x._end_shift) / 2.0 for x in events ], label='After correction') plt.legend(fontsize=5, frameon=False, fancybox=False) plt.savefig(args.plot_path, dpi=300) finally: if args.cleanup: src_demuxer.cleanup() dst_demuxer.cleanup()