def main(): global line_count print("AutoSub v0.1\n") parser = argparse.ArgumentParser(description="AutoSub v0.1") parser.add_argument('--model', required=True, help='DeepSpeech model file') parser.add_argument('--scorer', help='DeepSpeech scorer file') parser.add_argument('--file', required=True, help='Input video file') args = parser.parse_args() ds_model = args.model if not ds_model.endswith(".pbmm"): print("Invalid model file. Exiting\n") exit(1) # Load DeepSpeech model ds = Model(ds_model) if args.scorer: ds_scorer = args.scorer if not ds_scorer.endswith(".scorer"): print("Invalid scorer file. Running inference using only model file\n") else: ds.enableExternalScorer(ds_scorer) input_file = args.file print("\nInput file:", input_file) base_directory = os.getcwd() output_directory = os.path.join(base_directory, "output") audio_directory = os.path.join(base_directory, "audio") video_file_name = input_file.split("/")[-1].split(".")[0] audio_file_name = os.path.join(audio_directory, video_file_name + ".wav") srt_file_name = os.path.join(output_directory, video_file_name + ".srt") # Extract audio from input video file extract_audio(input_file, audio_file_name) print("Splitting on silent parts in audio file") silenceRemoval(audio_file_name) # Output SRT file file_handle = open(srt_file_name, "a+") print("\nRunning inference:") for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))): audio_segment_path = os.path.join(audio_directory, file) # Dont run inference on the original audio file if audio_segment_path.split("/")[-1] != audio_file_name.split("/")[-1]: ds_process_audio(ds, audio_segment_path, file_handle) print("\nSRT file saved to", srt_file_name) file_handle.close()
def main(): global line_count print("AutoSub v0.1\n") parser = argparse.ArgumentParser(description="AutoSub v0.1") parser.add_argument('--model', required=True, help='DeepSpeech model file') parser.add_argument('--scorer', help='DeepSpeech scorer file') parser.add_argument('--file', required=True, help='Input video file') parser.add_argument('--vtt', dest="vtt", action="store_true", help='Output a vtt file with cue points for individual words instead of a srt file') args = parser.parse_args() ds_model = args.model if not ds_model.endswith(".pbmm"): print("Invalid model file. Exiting\n") exit(1) # Load DeepSpeech model ds = Model(ds_model) if args.scorer: ds_scorer = args.scorer if not ds_scorer.endswith(".scorer"): print("Invalid scorer file. Running inference using only model file\n") else: ds.enableExternalScorer(ds_scorer) input_file = args.file print("\nInput file:", input_file) base_directory = os.getcwd() output_directory = os.path.join(base_directory, "output") audio_directory = os.path.join(base_directory, "audio") video_file_name = input_file.split(os.sep)[-1].split(".")[0] audio_file_name = os.path.join(audio_directory, video_file_name + ".wav") srt_extension = ".srt" if not args.vtt else ".vtt" srt_file_name = os.path.join(output_directory, video_file_name + srt_extension) # Extract audio from input video file extract_audio(input_file, audio_file_name) print("Splitting on silent parts in audio file") silenceRemoval(audio_file_name) # Output SRT or VTT file file_handle = open(srt_file_name, "a+") if args.vtt: file_handle.write("WEBVTT\n") file_handle.write("Kind: captions\n\n") print("\nRunning inference:") for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))): audio_segment_path = os.path.join(audio_directory, file) # Dont run inference on the original audio file if audio_segment_path.split(os.sep)[-1] != audio_file_name.split(os.sep)[-1]: ds_process_audio(ds, audio_segment_path, file_handle, args.vtt) if not args.vtt: print("\nSRT file saved to", srt_file_name) else: print("\nVTT file saved to", srt_file_name) file_handle.close() # Clean audio/ directory shutil.rmtree(audio_directory) os.mkdir(audio_directory)