def test_hifigan_synthesis(): hifigan_model_path = os.path.join("test_samples", "hifigan.pt") hifigan_config_path = os.path.join("test_samples", "config.json") audio_path = "synthesized_audio.wav" transcription_model = Silero() hifigan = Hifigan(hifigan_model_path, hifigan_config_path) text = "the monkeys live" synthesize( model=FakeModelForSynthesis(), text=text, graph_path=None, audio_path=audio_path, vocoder=hifigan, ) assert os.path.isfile(audio_path) assert similarity( text, transcription_model.transcribe(audio_path)) > MIN_SYNTHESIS_SCORE os.remove(audio_path)
def create_dataset_post(): min_confidence = float(request.form["confidence"]) language = request.form["language"] combine_clips = request.form.get("combine_clips") is not None min_length = float(request.form["min_length"]) max_length = float(request.form["max_length"]) transcription_model = ( Silero(language) if language in SILERO_LANGUAGES else DeepSpeech( os.path.join(paths["languages"], language, TRANSCRIPTION_MODEL))) symbols = get_symbols(language) text_file = SUBTITLE_FILE if request.files["text_file"].filename.endswith( ".srt") else TEXT_FILE if request.form["name"]: output_folder = os.path.join(paths["datasets"], request.form["name"]) if os.path.exists(output_folder): request.files = None raise Exception("Dataset name taken") os.makedirs(output_folder, exist_ok=True) text_path = os.path.join(output_folder, text_file) audio_path = os.path.join(output_folder, request.files["audio_file"].filename) with open(text_path, "w", encoding=CHARACTER_ENCODING) as f: f.write(request.files["text_file"].read().decode( CHARACTER_ENCODING, "ignore").replace("\r\n", "\n")) request.files["audio_file"].save(audio_path) start_progress_thread( create_dataset, text_path=text_path, audio_path=audio_path, transcription_model=transcription_model, output_folder=output_folder, min_length=min_length, max_length=max_length, min_confidence=min_confidence, combine_clips=combine_clips, symbols=symbols, ) else: output_folder = os.path.join(paths["datasets"], request.form["dataset"]) suffix = get_suffix() text_path = os.path.join(output_folder, add_suffix(text_file, suffix)) audio_path = os.path.join( output_folder, add_suffix(request.files["audio_file"].filename, suffix)) with open(text_path, "w", encoding=CHARACTER_ENCODING) as f: f.write(request.files["text_file"].read().decode( CHARACTER_ENCODING, "ignore").replace("\r\n", "\n")) request.files["audio_file"].save(audio_path) start_progress_thread( extend_existing_dataset, text_path=text_path, audio_path=audio_path, transcription_model=transcription_model, output_folder=output_folder, suffix=suffix, min_length=min_length, max_length=max_length, min_confidence=min_confidence, combine_clips=combine_clips, symbols=symbols, ) return render_template("progress.html", next_url=get_next_url(URLS, request.path))
"--output_folder", help="Path to save dataset", type=str, default="wavs") parser.add_argument("-s", "--suffix", help="String suffix for added files", type=str, required=True) parser.add_argument("-l", "--language", help="The language to use", type=str, default="English") parser.add_argument("-s", "--symbol_path", help="Path to symbol/alphabet file", type=str, default=None) args = parser.parse_args() extend_existing_dataset( text_path=args.text_path, audio_path=args.audio_path, transcription_model=Silero(args.language), output_folder=args.output_folder, suffix=args.suffix, symbols=load_symbols(args.symbol_path) if args.symbol_path else DEFAULT_ALPHABET, )
def test_silero(): transcription_model = Silero() audio_path = os.path.join("test_samples", "audio.wav") transcription = transcription_model.transcribe(audio_path) assert similarity(TEXT, transcription) > MIN_SYNTHESIS_SCORE