def test_hifigan_synthesis():
    hifigan_model_path = os.path.join("test_samples", "hifigan.pt")
    hifigan_config_path = os.path.join("test_samples", "config.json")
    audio_path = "synthesized_audio.wav"
    transcription_model = Silero()

    hifigan = Hifigan(hifigan_model_path, hifigan_config_path)
    text = "the monkeys live"
    synthesize(
        model=FakeModelForSynthesis(),
        text=text,
        graph_path=None,
        audio_path=audio_path,
        vocoder=hifigan,
    )

    assert os.path.isfile(audio_path)
    assert similarity(
        text, transcription_model.transcribe(audio_path)) > MIN_SYNTHESIS_SCORE

    os.remove(audio_path)
Example #2
0
def create_dataset_post():
    min_confidence = float(request.form["confidence"])
    language = request.form["language"]
    combine_clips = request.form.get("combine_clips") is not None
    min_length = float(request.form["min_length"])
    max_length = float(request.form["max_length"])
    transcription_model = (
        Silero(language) if language in SILERO_LANGUAGES else DeepSpeech(
            os.path.join(paths["languages"], language, TRANSCRIPTION_MODEL)))
    symbols = get_symbols(language)
    text_file = SUBTITLE_FILE if request.files["text_file"].filename.endswith(
        ".srt") else TEXT_FILE

    if request.form["name"]:
        output_folder = os.path.join(paths["datasets"], request.form["name"])
        if os.path.exists(output_folder):
            request.files = None
            raise Exception("Dataset name taken")

        os.makedirs(output_folder, exist_ok=True)
        text_path = os.path.join(output_folder, text_file)
        audio_path = os.path.join(output_folder,
                                  request.files["audio_file"].filename)

        with open(text_path, "w", encoding=CHARACTER_ENCODING) as f:
            f.write(request.files["text_file"].read().decode(
                CHARACTER_ENCODING, "ignore").replace("\r\n", "\n"))
        request.files["audio_file"].save(audio_path)
        start_progress_thread(
            create_dataset,
            text_path=text_path,
            audio_path=audio_path,
            transcription_model=transcription_model,
            output_folder=output_folder,
            min_length=min_length,
            max_length=max_length,
            min_confidence=min_confidence,
            combine_clips=combine_clips,
            symbols=symbols,
        )
    else:
        output_folder = os.path.join(paths["datasets"],
                                     request.form["dataset"])
        suffix = get_suffix()
        text_path = os.path.join(output_folder, add_suffix(text_file, suffix))
        audio_path = os.path.join(
            output_folder,
            add_suffix(request.files["audio_file"].filename, suffix))

        with open(text_path, "w", encoding=CHARACTER_ENCODING) as f:
            f.write(request.files["text_file"].read().decode(
                CHARACTER_ENCODING, "ignore").replace("\r\n", "\n"))
        request.files["audio_file"].save(audio_path)

        start_progress_thread(
            extend_existing_dataset,
            text_path=text_path,
            audio_path=audio_path,
            transcription_model=transcription_model,
            output_folder=output_folder,
            suffix=suffix,
            min_length=min_length,
            max_length=max_length,
            min_confidence=min_confidence,
            combine_clips=combine_clips,
            symbols=symbols,
        )

    return render_template("progress.html",
                           next_url=get_next_url(URLS, request.path))
                        "--output_folder",
                        help="Path to save dataset",
                        type=str,
                        default="wavs")
    parser.add_argument("-s",
                        "--suffix",
                        help="String suffix for added files",
                        type=str,
                        required=True)
    parser.add_argument("-l",
                        "--language",
                        help="The language to use",
                        type=str,
                        default="English")
    parser.add_argument("-s",
                        "--symbol_path",
                        help="Path to symbol/alphabet file",
                        type=str,
                        default=None)
    args = parser.parse_args()

    extend_existing_dataset(
        text_path=args.text_path,
        audio_path=args.audio_path,
        transcription_model=Silero(args.language),
        output_folder=args.output_folder,
        suffix=args.suffix,
        symbols=load_symbols(args.symbol_path)
        if args.symbol_path else DEFAULT_ALPHABET,
    )
Example #4
0
def test_silero():
    transcription_model = Silero()

    audio_path = os.path.join("test_samples", "audio.wav")
    transcription = transcription_model.transcribe(audio_path)
    assert similarity(TEXT, transcription) > MIN_SYNTHESIS_SCORE