def main(): filename = "/home/tc/DATA/subtest/test1.wav" audio_filename, audio_rate = extract_audio(filename) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(10) converter=FLACConverter(source_path=audio_filename) recognizer= SpeechRecognizer(language=args.src_language, rate=audio_rate,api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = ["Converting speech regions to FLAC files", Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print "Cancelling transcription" return 1 timed_subtitles = [(r,t) for r, t in zip(regions, transcript) if t] formatter = FORMATTERS.get(ars.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base,format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print "subtitles file created at {}".format(dest) os.remove(audio_filename) return 0
def emit_subtitles(dest, source_path, lang, format, regions, transcripts): timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(format) formatted_subtitles = formatter(timed_subtitles) if not dest: base, ext = os.path.splitext(source_path) dest = "{base}.{format}".format(base=base, format=format) base, ext = os.path.splitext(dest) dest = "{base}.subs.{lang}{ext}".format(base=base, lang=lang, ext=ext) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) return dest
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'source_path', help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument( '-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help= "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)" ) parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="en") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="en") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument( '--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() print args if (os.name == "posix"): args.source_path = str(self.filename) else: args.source_path = (str(self.filename)).replace("/", "\\") pas = (args.source_path).replace("/", "\\") args.source_path = pas print " Printing pas >>>", pas print args path = args.source_path[:-3] srt_path = path + "srt" if args.list_formats: print("List of formats:") for subtitle_format in FORMATTERS.keys(): print("{format}".format(format=subtitle_format)) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print("{code}\t{language}".format(code=code, language=languages)) return 0 if args.format not in FORMATTERS.keys(): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in LANGUAGE_CODES.keys(): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in LANGUAGE_CODES.keys(): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = ProcessingPool(args.concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=args.src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate( pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) self.progress1.setValue(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) self.progress2.setValue(i) pbar.finish() QMessageBox.about(self, "Subtitles created", "Created at " + srt_path) if not is_same_language(args.src_language, args.dst_language): if args.api_key: google_translate_api_key = args.api_key translator = Translator(args.dst_language, google_translate_api_key, dst=args.dst_language, src=args.src_language) prompt = "Translating from {0} to {1}: ".format( args.src_language, args.dst_language) widgets = [ prompt, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) self.progress2.setValue(i) pbar.finish() transcripts = translated_transcripts else: print "Error: Subtitle translation requires specified Google Translate API key. \See --help for further information." return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print "Cancelling transcription" return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print "Subtitles file created at {}".format(dest) os.remove(audio_filename) return 0
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, src_language, listener_progress, output=None, concurrency=DEFAULT_CONCURRENCY, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT): # windows not support forkserver... only spawn if os.name != "nt" and "Darwin" in os.uname(): # necessary for running on MacOS # method can be set only once, otherwise crash #from python 3.8 above the default for macos is spawn and not fork if 'spawn' != multiprocessing.get_start_method(allow_none=True): multiprocessing.set_start_method('spawn') Ctr_Autosub.cancel = False Ctr_Autosub.step = 0 """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: if Ctr_Autosub.cancel: return -1 str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files " len_regions = len(regions) extracted_regions = [] Ctr_Autosub.pool = multiprocessing.Pool(concurrency) for i, extracted_region in enumerate( Ctr_Autosub.pool.imap(converter, regions)): Ctr_Autosub.step = 1 extracted_regions.append(extracted_region) progress_percent = MyUtil.percentage(i, len_regions) Ctr_Autosub.output_progress(listener_progress, str_task_1, progress_percent) if Ctr_Autosub.cancel: return -1 else: Ctr_Autosub.pool.close() Ctr_Autosub.pool.join() str_task_2 = "Step 2 of 2: Performing speech recognition " Ctr_Autosub.pool = multiprocessing.Pool(concurrency) for i, transcript in enumerate( Ctr_Autosub.pool.imap(recognizer, extracted_regions)): Ctr_Autosub.step = 2 transcripts.append(transcript) progress_percent = MyUtil.percentage(i, len_regions) Ctr_Autosub.output_progress(listener_progress, str_task_2, progress_percent) if Ctr_Autosub.cancel: return -1 else: Ctr_Autosub.pool.close() Ctr_Autosub.pool.join() except KeyboardInterrupt: Ctr_Autosub.pbar.finish() Ctr_Autosub.pool.terminate() Ctr_Autosub.pool.join() raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) if Ctr_Autosub.cancel: return -1 else: Ctr_Autosub.pool.close() Ctr_Autosub.pool.join() return dest
def generate_subtitles( source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language_list=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) dest_list = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() transcripts = [] for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() for dst_language in dst_language_list: translated_transcripts = [] if not is_same_language(src_language, dst_language): if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [ prompt, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 if len(translated_transcripts) > 0: timed_subtitles = [ (r, t) for r, t in zip(regions, translated_transcripts) if t ] else: timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base, ext = os.path.splitext(source_path) dest = "{base}_{lang}.{format}".format( base=base, lang=dst_language, format=subtitle_file_format) dest_list.append(dest) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise os.remove(audio_filename) return dest_list
def generate_subtitles(source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, min_sample_length=DEFAULT_MIN_LENGTH, max_sample_length=DEFAULT_MAX_LENGTH, silent_percentile=DEFAULT_PERCENTILE, silent_frame_cut=DEFAULT_FRAME_CUT, interval=DEFAULT_INTERVAL): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename, silent_percentile=silent_percentile, min_region_size=min_sample_length, max_region_size=max_sample_length, silent_frame_cut=silent_frame_cut, percentile_interval=interval) print("Found %i regions with potential speech." % len(regions)) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if src_language.split("-")[0] != dst_language.split("-")[0]: if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) print("Found %i segments with voice." % len(timed_subtitles)) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest
def generate_subtitles( audio_filename, audio_rate, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) if not is_same_language(src_language, dst_language): if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 except KeyboardInterrupt: pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base, ext = os.path.splitext(audio_filename) dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) return dest
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if src_language.split("-")[0] != dst_language.split("-")[0]: if api_key: translator = TranslatorWithApikey(dst_language, api_key, dst=dst_language, src=src_language) print("Using free translation API...") else: translator = TranslatorWithoutApikey( src_language, dst_language) print("Using specific translation API...") prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): """ Given an input audio/video file, generate subtitles in the specified language and format. """ if os.name != "nt" and "Darwin" in os.uname(): #the default unix fork method does not work on Mac OS #need to use forkserver if 'forkserver' != multiprocessing.get_start_method(allow_none=True): multiprocessing.set_start_method('forkserver') audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if src_language.split("-")[0] != dst_language.split("-")[0]: if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format(src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate(pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information." ) return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest
def main(): parser = argparse.ArgumentParser() parser.add_argument('source_path', help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)") parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="en") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="en") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() if args.list_formats: print("List of formats:") for subtitle_format in FORMATTERS.keys(): print("{format}".format(format=subtitle_format)) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print("{code}\t{language}".format(code=code, language=language)) return 0 if args.format not in FORMATTERS.keys(): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in LANGUAGE_CODES.keys(): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in LANGUAGE_CODES.keys(): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 #if not args.source_path: # print("Error: You need to specify a source path.") # return 1 config = get_config("apikey.conf") args.api_key = config['apikey'] args.src_language = config['source_lang'] args.dst_language = config['dest_lang'] transpath = config['path'] entries = os.listdir(transpath) for fp in entries: # Split the extension from the path and normalise it to lowercase. ext = os.path.splitext(fp)[-1].lower() rpath = os.path.normpath(os.path.join(transpath, fp)) #rpath = os.path.abspath(fp) print(rpath) rightf = ext.endswith(('.mp4', '.mp3', '.m4a', '.wav', 'mov', '.3gp', '.avi', '.ogg', '.webm', 'mkv', 'flv')) if rightf: #command = ["python.exe Scripts/autosub", "-S", "en", "-D", "zh-TW", "-K", "AIzaSyCEPufSi1M0SD-dcmfAnDZNw6tUwHcsLzM", rpath] #print(command) #subprocess.check_output(command, stdin=open(os.devnull)) #else: # print fp, "is an unknown file format. audio_filename, audio_rate = extract_audio(rpath) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(args.concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=args.src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate( pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if not is_same_language(args.src_language, args.dst_language): if args.api_key: google_translate_api_key = args.api_key translator = Translator(args.dst_language, google_translate_api_key, dst=args.dst_language, src=args.src_language) prompt = "Translating from {0} to {1}: ".format( args.src_language, args.dst_language) widgets = [ prompt, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. \ See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(rpath) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print("Subtitles file created at {}".format(dest)) os.remove(audio_filename) return 0
def generate_subtitles( source_path, src_language, listener_progress, output=None, concurrency=DEFAULT_CONCURRENCY, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, audio_filename='', audio_rate='', name='' ): regions = find_speech_regions(audio_filename) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: print("Step 1 of 2: Converting speech regions to FLAC files ") len_regions = len(regions) extracted_regions = [] subtitles.pool = multiprocessing.Pool(concurrency) for i, extracted_region in enumerate(subtitles.pool.imap(converter, regions)): extracted_regions.append(extracted_region) print(i) subtitles.stop() print("Step 2 of 2: Performing speech recognition ") subtitles.pool = multiprocessing.Pool(concurrency) for i, transcript in enumerate(subtitles.pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) print(i) subtitles.stop() except KeyboardInterrupt: subtitles.pbar.finish() subtitles.stop() raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0]+'\\'+name dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) subtitles.stop() return dest
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, project_id=None, location="global", model="default"): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) tpool = concurrent.futures.ThreadPoolExecutor(concurrency) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, model=model) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( tpool.map(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if src_language.split("-")[0] != dst_language.split("-")[0]: if project_id: translator = Translator(project_id, location=location, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( tpool.map(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest
def OpenFile(self, filename=None): """Open a media file in a MediaPlayer if filename is None: filename = QtGui.QFileDialog.getOpenFileName(self, "Open File", os.path.expanduser('~')) if not filename: return""" filename = QtGui.QFileDialog.getOpenFileName(self, 'Open File') formats = ['.mp4', '.mkv', '.avi', '.MP4', '.MKV', '.AVI'] base, ext = os.path.splitext(str(filename)) self.sub_path = base + '.' + 'srt' if filename == "" or ext not in formats: self.invalid_file() else: if os.path.isfile(base + '.' + 'srt'): print 'subtitle file already present' else: audio_filename, audio_rate = extract_audio(str(filename)) command = "ffmpeg -i " + str( filename ) + " -acodec pcm_s16le -ac 1 -ar 16000 " + base + ".wav" print command subprocess.call(command, shell=True) regions = find_speech_regions(audio_filename) # freeze_support() pool = multiprocessing.Pool(10) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language="en", rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() # self.progressLabel.setText("Converting speech regions to FLAC files: ") extracted_regions = [] for i, extracted_region in enumerate( pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) # self.progress.setValue(i * 100 / 66) pbar.finish() # self.progress.setValue(0) widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() # self.progressLabel.setText("Performing speech recognition: ") for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) # self.progress.setValue(i * 100 / 39) pbar.finish() # self.progress.setValue(100) if not is_same_language("en", "en"): if args.api_key: google_translate_api_key = self.args.api_key translator = Translator( self.args.dst_language, google_translate_api_key, dst=self.args.dst_language, src=self.args.src_language) prompt = "Translating from {0} to {1}: ".format( self.args.src_language, self.args.dst_language) widgets = [ prompt, Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar( widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] # self.progress.setValue(0) for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) # self.progress.setValue(i) pbar.finish() # self.progress.setValue(100) transcripts = translated_transcripts else: print "Error: Subtitle translation requires specified Google Translate API key. \ See --help for further information." return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print "Cancelling transcription" return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get("srt") formatted_subtitles = formatter(timed_subtitles) base, ext = os.path.splitext(str(filename)) dest = "{base}.{format}".format(base=base, format="srt") with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print "Subtitles file created at {}".format(dest) # open_video(0, self.args.source_path) # print formatted_subtitles.split('\n') # create the media self.sec = -1 if sys.version < '3': filename = unicode(filename) self.media = self.instance.media_new(filename) # put the media in the media player self.mediaplayer.set_media(self.media) # parse the metadata of the file self.media.parse() # set the title of the track as window title self.setWindowTitle(self.media.get_meta(0)) # the media player has to be 'connected' to the QFrame # (otherwise a video would be displayed in it's own window) if sys.platform.startswith( 'linux'): # for Linux using the X Server self.mediaplayer.set_xwindow(self.videoframe.winId()) elif sys.platform == "win32": # for Windows self.mediaplayer.set_hwnd(self.videoframe.winId()) elif sys.platform == "darwin": # for MacOS self.mediaplayer.set_nsobject(self.videoframe.winId()) self.PlayPause() self.duration = get_duration_wav(base + ".wav") f = open(self.sub_path) top_10 = {} pos = 0 wrds = [ 'their', 'the', 'a', 'on', 'an', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them', 'what', 'who', 'whom', 'mine', 'yours', 'his', 'hers', 'ours', 'theirs', 'this', 'that', 'these', 'those', 'with', 'at', 'by', 'into', 'for', 'to', 'up', 'of', 'in', 'is', 'are', 'and', 'as', 'if', 'from' ] for i in f.readlines(): pos += 1 if (pos + 1) % 4 == 0: for j in i.lower().split(): if top_10.get(j) != None: if j not in wrds: top_10[j] += 1 else: if j not in wrds: top_10[j] = 1 top = [] self.comboBox.clear() for key, value in sorted(top_10.iteritems(), key=lambda (k, v): (v, k), reverse=True): print "%s: %s" % (key, value) top.append(value) self.comboBox.addItem(str(key)) if len(top) == 10: break