def __call__(self, region): try: start_ms, end_ms = region start = float(start_ms) / 1000.0 end = float(end_ms) / 1000.0 if start > self.include_before: start = start - self.include_before end += self.include_after if not self.is_keep or not self.output: temp = tempfile.NamedTemporaryFile(suffix=self.suffix, delete=False) command = self.cmd.format(start=start, dura=end - start, in_=self.source_path, out_=temp.name) prcs = subprocess.Popen(constants.cmd_conversion(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) prcs.communicate() return temp.name filename = self.output \ + "-{start:0>8.3f}-{end:0>8.3f}{suffix}".format( start=start, end=end, suffix=self.suffix) command = self.cmd.format(start=start, dura=end - start, in_=self.source_path, out_=filename) prcs = subprocess.Popen(constants.cmd_conversion(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) err = prcs.communicate()[1] if err: return None audio_file = open(filename, mode="rb") audio_data = audio_file.read() audio_file.close() if len(audio_data) <= 4: return None return filename except KeyboardInterrupt: return None except subprocess.CalledProcessError as ffmpeg_exec_error: raise exceptions.AutosubException( _("Error: ffmpeg can't split your file. " "Check your audio processing options.") ) from ffmpeg_exec_error
def __call__(self, region): try: start_ms, end_ms = region start = float(start_ms) / 1000.0 end = float(end_ms) / 1000.0 if start > self.include_before: start = start - self.include_before end += self.include_after if not self.is_keep or not self.output: temp = tempfile.NamedTemporaryFile(suffix=self.suffix, delete=False) command = self.cmd.format(start=start, dura=end - start, in_=self.source_path, out_=temp.name) subprocess.check_output( constants.cmd_conversion(command), stdin=open(os.devnull)) return temp.name filename = self.output \ + "-{start:0>8.3f}-{end:0>8.3f}{suffix}".format( start=start, end=end, suffix=self.suffix) command = self.cmd.format(start=start, dura=end - start, in_=self.source_path, out_=filename) subprocess.check_output( constants.cmd_conversion(command), stdin=open(os.devnull)) return filename except KeyboardInterrupt: return None except subprocess.CalledProcessError: raise exceptions.AutosubException( _("Error: ffmpeg can't split your file. " "Check your audio processing options.") )
def ffprobe_check_file(filename): """ Give an audio or video file and check whether it is not empty by get its bitrate. """ print(_("\nUse ffprobe to check conversion result.")) command = constants.DEFAULT_CHECK_CMD.format(in_=filename) print(command) ffprobe_bytes = subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) ffprobe_str = ffprobe_bytes.decode(sys.stdout.encoding) print(ffprobe_str) bitrate_idx = ffprobe_str.find('bit_rate') if bitrate_idx < 0 or \ ffprobe_str[bitrate_idx + 9:bitrate_idx + 10].lower() == 'n': return False return True
def ffprobe_check_file(filename): """ Give an audio or video file and check whether it is not empty by get its bitrate. """ command = "ffprobe {in_} -show_format -pretty -loglevel quiet".format( in_=filename) print(command) ffprobe_bytes = subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull), shell=False) ffprobe_str = ffprobe_bytes.decode(sys.stdout.encoding) bitrate_idx = ffprobe_str.find('bit_rate') if bitrate_idx < 0 or \ ffprobe_str[bitrate_idx + 9:bitrate_idx + 10].lower() == 'n': return False return True
def ffprobe_get_fps( # pylint: disable=superfluous-parens video_file, input_m=input): """ Return video_file's fps. """ try: command = constants.DEFAULT_VIDEO_FPS_CMD.format(in_=video_file) print(command) prcs = subprocess.Popen(constants.cmd_conversion(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = prcs.communicate() if out: ffprobe_str = out.decode(sys.stdout.encoding) print(ffprobe_str) else: ffprobe_str = err.decode(sys.stdout.encoding) print(ffprobe_str) num_list = map( int, re.findall(r'\d+', ffprobe_str.decode(sys.stdout.encoding))) num_list = list(num_list) if len(num_list) == 2: fps = float(num_list[0]) / float(num_list[1]) else: raise ValueError except (subprocess.CalledProcessError, ValueError) as fps_issue: print( _("ffprobe can't get video fps.\n" "It is necessary when output is \".sub\".")) if input_m: input_str = input_m( _("Input your video fps. " "Any illegal input will regard as \".srt\" instead.\n")) try: fps = float(input_str) if fps <= 0.0: raise ValueError from fps_issue except ValueError: print(_("Use \".srt\" instead.")) fps = 0.0 else: return 0.0 return fps
def ffprobe_check_file(filename): """ Give an audio or video file and check whether it is not empty by get its bitrate. """ print(_("\nUse ffprobe to check conversion result.")) command = constants.DEFAULT_CHECK_CMD.format(in_=filename) print(command) prcs = subprocess.Popen(constants.cmd_conversion(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = prcs.communicate() if out: ffprobe_str = out.decode(sys.stdout.encoding) print(ffprobe_str) else: ffprobe_str = err.decode(sys.stdout.encoding) print(ffprobe_str) bitrate_idx = ffprobe_str.find('bit_rate') if bitrate_idx < 0 or \ ffprobe_str[bitrate_idx + 9:bitrate_idx + 10].lower() == 'n': return False return True
def ffprobe_get_fps( # pylint: disable=superfluous-parens video_file, input_m=input): """ Return video_file's fps. """ try: command = constants.DEFAULT_VIDEO_FPS_CMD.format(in_=video_file) print(command) input_str = subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) num_list = map( int, re.findall(r'\d+', input_str.decode(sys.stdout.encoding))) if len(list(num_list)) == 2: fps = float(num_list[0]) / float(num_list[1]) else: raise ValueError except (subprocess.CalledProcessError, ValueError): print( _("ffprobe(ffmpeg) can't get video fps.\n" "It is necessary when output is \".sub\".")) if input_m: input_str = input_m( _("Input your video fps. " "Any illegal input will regard as \".srt\" instead.\n")) try: fps = float(input_str) if fps <= 0.0: raise ValueError except ValueError: print(_("Use \".srt\" instead.")) fps = 0.0 else: return 0.0 return fps
def audio_pre_prcs( # pylint: disable=too-many-arguments, too-many-branches filename, is_keep, cmds, output_name=None, input_m=input): """ Pre-process audio file. """ output_list = [ filename, ] if not cmds: cmds = constants.DEFAULT_AUDIO_PRCS if not constants.FFMPEG_NORMALIZE_CMD: print( _("Warning: Dependency ffmpeg-normalize " "not found on this machine. " "Try default method.")) return None if is_keep and output_name: for i in range(1, len(cmds) + 1): output_list.append(output_name + '_temp_{num:0>3d}.flac'.format(num=i)) if input_m: while os.path.isfile(output_list[i]): print( _("There is already a file with the same name" " in this location: \"{dest_name}\".").format( dest_name=output_list[i])) output_list[i] = input_m( _("Input a new path (including directory and file name) " "for output file.\n")) output_list[i] = os.path.splitext(output_list[i])[0] output_list[i] = "{base}.{extension}".format( base=output_list[i], extension='temp.flac') else: if os.path.isfile(output_list[i]): os.remove(output_list[i]) command = cmds[i - 1].format(in_=output_list[i - 1], out_=output_list[i]) print(command) subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) if not ffprobe_check_file(output_list[i]): print(_("Audio pre-processing failed. Try default method.")) return None else: temp_file = tempfile.NamedTemporaryFile(suffix='.flac', delete=False) temp = temp_file.name temp_file.close() if os.path.isfile(temp): os.remove(temp) output_list.append(temp) command = cmds[0].format(in_=output_list[0], out_=output_list[1]) print(command) subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) for i in range(2, len(cmds) + 1): temp_file = tempfile.NamedTemporaryFile(suffix='.flac', delete=False) temp = temp_file.name temp_file.close() if os.path.isfile(temp): os.remove(temp) output_list.append(temp) command = cmds[i - 1].format(in_=output_list[i - 1], out_=output_list[i]) print(command) subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) if not ffprobe_check_file(output_list[i]): print(_("Audio pre-processing failed. Try default method.")) os.remove(output_list[i]) return None os.remove(output_list[i - 1]) return output_list[-1]
def audio_or_video_prcs( # pylint: disable=too-many-branches, too-many-statements, too-many-locals, too-many-arguments args, input_m=input, fps=30.0, styles_list=None, no_audio_prcs=False): """ Give args and process an input audio or video file. """ if args.http_speech_api: gsv2_api_url = "http://" + constants.GOOGLE_SPEECH_V2_API_URL else: gsv2_api_url = "https://" + constants.GOOGLE_SPEECH_V2_API_URL if not args.output_files: raise exceptions.AutosubException( _("\nNo works done." " Check your \"-of\"/\"--output-files\" option.")) if args.ext_regions: # use external speech regions print(_("Use external speech regions.")) audio_wav_temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) audio_wav = audio_wav_temp.name audio_wav_temp.close() command = args.audio_conversion_cmd.format(in_=args.input, channel=1, sample_rate=16000, out_=audio_wav) print(command) subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) regions = sub_utils.sub_to_speech_regions(audio_wav=audio_wav, sub_file=args.ext_regions) os.remove(audio_wav) else: # use auditok_gen_speech_regions mode = 0 if args.strict_min_length: mode = auditok.StreamTokenizer.STRICT_MIN_LENGTH if args.drop_trailing_silence: mode = mode | auditok.StreamTokenizer.DROP_TRAILING_SILENCE elif args.drop_trailing_silence: mode = auditok.StreamTokenizer.DROP_TRAILING_SILENCE audio_wav_temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) audio_wav = audio_wav_temp.name audio_wav_temp.close() command = args.audio_conversion_cmd.format(in_=args.input, channel=1, sample_rate=48000, out_=audio_wav) print( _("\nConvert source audio to \"{name}\" " "and get audio length for regions detection.").format( name=audio_wav)) print(command) subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) if not ffmpeg_utils.ffprobe_check_file(audio_wav): raise exceptions.AutosubException( _("Error: Convert source audio to \"{name}\" failed.").format( name=audio_wav)) regions = core.auditok_gen_speech_regions( audio_wav=audio_wav, energy_threshold=args.energy_threshold, min_region_size=constants.MIN_REGION_SIZE, max_region_size=constants.MAX_REGION_SIZE, max_continuous_silence=constants.DEFAULT_CONTINUOUS_SILENCE, mode=mode) os.remove(audio_wav) print(_("\n\"{name}\" has been deleted.").format(name=audio_wav)) if not regions: raise exceptions.AutosubException( _("Error: Can't get speech regions.")) if args.speech_language or \ args.audio_process and 's' in args.audio_process: # process output first try: args.output_files.remove("regions") if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): times_string = core.list_to_ass_str( text_list=regions, styles_list=styles_list, subtitles_file_format=args.format) else: times_string = core.list_to_sub_str( timed_text=regions, fps=fps, subtitles_file_format=args.format) # times to subtitles string times_name = "{base}.{nt}.{extension}".format( base=args.output, nt="times", extension=args.format) subtitles_file_path = core.str_to_file(str_=times_string, output=times_name, input_m=input_m) # subtitles string to file print( _("Times file created at \"{}\".").format(subtitles_file_path)) if not args.output_files: raise exceptions.AutosubException(_("\nAll works done.")) except KeyError: pass if not no_audio_prcs: audio_for_api_temp = tempfile.NamedTemporaryFile( suffix=args.api_suffix, delete=False) audio_for_api = audio_for_api_temp.name audio_for_api_temp.close() command = args.audio_conversion_cmd.format( in_=args.input, channel=args.api_audio_channel, sample_rate=args.api_sample_rate, out_=audio_for_api) print( _("\nConvert to \"{name}\" " "for API.").format(name=audio_for_api)) print(command) subprocess.check_output(constants.cmd_conversion(command), stdin=open(os.devnull)) if not ffmpeg_utils.ffprobe_check_file(audio_for_api): raise exceptions.AutosubException( _("Error: Convert source audio to \"{name}\" failed."). format(name=audio_for_api)) else: audio_for_api = args.input audio_fragments = core.bulk_audio_conversion( source_file=audio_for_api, output=args.output, regions=regions, split_cmd=args.audio_split_cmd, suffix=args.api_suffix, concurrency=args.audio_concurrency, is_keep=args.keep) if not audio_fragments or \ len(audio_fragments) != len(regions): if not args.keep: for audio_fragment in audio_fragments: os.remove(audio_fragment) raise exceptions.ConversionException( _("Error: Conversion failed.")) if not args.keep: os.remove(audio_for_api) print( _("\n\"{name}\" has been deleted.").format(name=audio_for_api)) if args.audio_process and 's' in args.audio_process: raise exceptions.AutosubException( _("Audio processing complete.\nAll works done.")) # speech to text text_list = core.audio_to_text(audio_fragments=audio_fragments, api_url=gsv2_api_url, regions=regions, api_key=args.gspeechv2, concurrency=args.speech_concurrency, src_language=args.speech_language, min_confidence=args.min_confidence, audio_rate=args.api_sample_rate, is_keep=args.keep) if not text_list or len(text_list) != len(regions): raise exceptions.SpeechToTextException( _("Error: Speech-to-text failed.\nAll works done.")) timed_text = get_timed_text(is_empty_dropped=args.drop_empty_regions, regions=regions, text_list=text_list) if args.dst_language: # process output first try: args.output_files.remove("src") if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): src_string = core.list_to_ass_str( text_list=timed_text, styles_list=styles_list[:2], subtitles_file_format=args.format, ) else: src_string = core.list_to_sub_str( timed_text=timed_text, fps=fps, subtitles_file_format=args.format) # formatting timed_text to subtitles string src_name = "{base}.{nt}.{extension}".format( base=args.output, nt=args.speech_language, extension=args.format) subtitles_file_path = core.str_to_file(str_=src_string, output=src_name, input_m=input_m) # subtitles string to file print( _("Speech language subtitles " "file created at \"{}\".").format(subtitles_file_path)) if not args.output_files: raise exceptions.AutosubException(_("\nAll works done.")) except KeyError: pass # text translation if args.gtransv2: # use gtransv2 translated_text = core.list_to_gtv2( text_list=text_list, api_key=args.gtransv2, concurrency=args.trans_concurrency, src_language=args.src_language, dst_language=args.dst_language, lines_per_trans=args.lines_per_trans) else: # use googletrans translated_text = core.list_to_googletrans( text_list, src_language=args.src_language, dst_language=args.dst_language, sleep_seconds=args.sleep_seconds, user_agent=args.user_agent, service_urls=args.service_urls) if not translated_text or len(translated_text) != len(regions): raise exceptions.AutosubException( _("Error: Translation failed.")) timed_trans = get_timed_text( is_empty_dropped=args.drop_empty_regions, regions=regions, text_list=translated_text) try: args.output_files.remove("bilingual") if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): bilingual_string = core.list_to_ass_str( text_list=[timed_text, timed_trans], styles_list=styles_list, subtitles_file_format=args.format, ) else: bilingual_string = core.list_to_sub_str( timed_text=timed_text + timed_trans, fps=fps, subtitles_file_format=args.format) # formatting timed_text to subtitles string bilingual_name = "{base}.{nt}.{extension}".format( base=args.output, nt=args.src_language + '&' + args.dst_language, extension=args.format) subtitles_file_path = core.str_to_file(str_=bilingual_string, output=bilingual_name, input_m=input_m) # subtitles string to file print( _("Bilingual subtitles file " "created at \"{}\".").format(subtitles_file_path)) if not args.output_files: raise exceptions.AutosubException(_("\nAll works done.")) except KeyError: pass try: args.output_files.remove("dst") # formatting timed_text to subtitles string if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): if len(args.styles) == 4: dst_string = core.list_to_ass_str( text_list=timed_trans, styles_list=styles_list[2:4], subtitles_file_format=args.format, ) else: dst_string = core.list_to_ass_str( text_list=timed_trans, styles_list=styles_list, subtitles_file_format=args.format, ) else: dst_string = core.list_to_sub_str( timed_text=timed_trans, fps=fps, subtitles_file_format=args.format) dst_name = "{base}.{nt}.{extension}".format( base=args.output, nt=args.dst_language, extension=args.format) subtitles_file_path = core.str_to_file(str_=dst_string, output=dst_name, input_m=input_m) # subtitles string to file print( _("Destination language subtitles " "file created at \"{}\".").format(subtitles_file_path)) except KeyError: pass else: if len(args.output_files) > 1 or not ({"dst", "src"} & args.output_files): print( _("Override \"-of\"/\"--output-files\" due to your args too few." "\nOutput source subtitles file only.")) timed_text = get_timed_text( is_empty_dropped=args.drop_empty_regions, regions=regions, text_list=text_list) if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): src_string = core.list_to_ass_str( text_list=timed_text, styles_list=styles_list, subtitles_file_format=args.format, ) else: src_string = core.list_to_sub_str( timed_text=timed_text, fps=fps, subtitles_file_format=args.format) # formatting timed_text to subtitles string src_name = "{base}.{nt}.{extension}".format( base=args.output, nt=args.speech_language, extension=args.format) subtitles_file_path = core.str_to_file(str_=src_string, output=src_name, input_m=input_m) # subtitles string to file print( _("Speech language subtitles " "file created at \"{}\".").format(subtitles_file_path)) else: print( _("Override \"-of\"/\"--output-files\" due to your args too few." "\nOutput regions subtitles file only.")) if args.styles and \ (args.format == 'ass' or args.format == 'ssa' or args.format == 'ass.json'): times_subtitles = core.list_to_ass_str( text_list=regions, styles_list=styles_list, subtitles_file_format=args.format) else: times_subtitles = core.list_to_sub_str( timed_text=regions, fps=fps, subtitles_file_format=args.format) # times to subtitles string times_name = "{base}.{nt}.{extension}".format(base=args.output, nt="times", extension=args.format) subtitles_file_path = core.str_to_file(str_=times_subtitles, output=times_name, input_m=input_m) # subtitles string to file print(_("Times file created at \"{}\".").format(subtitles_file_path))