Ejemplo n.º 1
0
def change_sample_rate(input_path, new_sample_rate):
    """
    Changes the sample rate of a given audio file.

    Parameters
    ----------
    input_path : str
        Path to audio file
    new_sample_rate : int
        Sample rate to convert audio to

    Returns
    -------
    str
        Path of the converted audio
    """
    output_path = add_suffix(input_path, str(new_sample_rate))
    check_output(
        ["ffmpeg", "-i", input_path, "-ar",
         str(new_sample_rate), output_path])
    return output_path
Ejemplo n.º 2
0
def create_dataset_post():
    min_confidence = float(request.form["confidence"])
    language = request.form["language"]
    combine_clips = request.form.get("combine_clips") is not None
    min_length = float(request.form["min_length"])
    max_length = float(request.form["max_length"])
    transcription_model = (
        Silero(language) if language in SILERO_LANGUAGES else DeepSpeech(
            os.path.join(paths["languages"], language, TRANSCRIPTION_MODEL)))
    symbols = get_symbols(language)
    text_file = SUBTITLE_FILE if request.files["text_file"].filename.endswith(
        ".srt") else TEXT_FILE

    if request.form["name"]:
        output_folder = os.path.join(paths["datasets"], request.form["name"])
        if os.path.exists(output_folder):
            request.files = None
            raise Exception("Dataset name taken")

        os.makedirs(output_folder, exist_ok=True)
        text_path = os.path.join(output_folder, text_file)
        audio_path = os.path.join(output_folder,
                                  request.files["audio_file"].filename)

        with open(text_path, "w", encoding=CHARACTER_ENCODING) as f:
            f.write(request.files["text_file"].read().decode(
                CHARACTER_ENCODING, "ignore").replace("\r\n", "\n"))
        request.files["audio_file"].save(audio_path)
        start_progress_thread(
            create_dataset,
            text_path=text_path,
            audio_path=audio_path,
            transcription_model=transcription_model,
            output_folder=output_folder,
            min_length=min_length,
            max_length=max_length,
            min_confidence=min_confidence,
            combine_clips=combine_clips,
            symbols=symbols,
        )
    else:
        output_folder = os.path.join(paths["datasets"],
                                     request.form["dataset"])
        suffix = get_suffix()
        text_path = os.path.join(output_folder, add_suffix(text_file, suffix))
        audio_path = os.path.join(
            output_folder,
            add_suffix(request.files["audio_file"].filename, suffix))

        with open(text_path, "w", encoding=CHARACTER_ENCODING) as f:
            f.write(request.files["text_file"].read().decode(
                CHARACTER_ENCODING, "ignore").replace("\r\n", "\n"))
        request.files["audio_file"].save(audio_path)

        start_progress_thread(
            extend_existing_dataset,
            text_path=text_path,
            audio_path=audio_path,
            transcription_model=transcription_model,
            output_folder=output_folder,
            suffix=suffix,
            min_length=min_length,
            max_length=max_length,
            min_confidence=min_confidence,
            combine_clips=combine_clips,
            symbols=symbols,
        )

    return render_template("progress.html",
                           next_url=get_next_url(URLS, request.path))
def extend_existing_dataset(
    text_path,
    audio_path,
    transcription_model,
    output_folder,
    suffix,
    logging=logging,
    min_length=MIN_LENGTH,
    max_length=MAX_LENGTH,
    min_confidence=0.85,
    combine_clips=True,
    symbols=DEFAULT_ALPHABET,
):
    """
    Extends an existing dataset.
    Converts audio to required format, generates clips & produces required files.

    Parameters
    ----------
    text_path : str
        Path to source text
    audio_path : str
        Path to source audio
    transcription_model : TranscriptionModel
        Transcription model
    output_folder : str
        Path to save dataset to
    suffix : str
        String suffix to append to filenames
    logging : logging (optional)
        Logging object to write logs to
    min_length : float (optional)
        Minimum duration of a clip in seconds
    max_length : float (optional)
        Maximum duration of a clip in seconds
    min_confidence : float (optional)
        Minimum confidence score to generate a clip for
    combine_clips : bool (optional)
        Whether to combine clips to make them longer
    symbols : list[str] (optional)
        list of valid symbols default to DEFAULT_ALPHABET

    Raises
    -------
    AssertionError
        If given paths are invalid or clips could not be produced
    """
    assert os.path.isdir(
        output_folder), "Missing existing dataset clips folder"
    logging.info(f"Converting {audio_path}...")
    converted_audio = convert_audio(audio_path)

    forced_alignment_path = os.path.join(output_folder, ALIGNMENT_FILE)
    output_path = os.path.join(output_folder, AUDIO_FOLDER)
    unlabelled_path = os.path.join(output_folder, UNLABELLED_FOLDER)
    label_path = os.path.join(output_folder, METADATA_FILE)
    info_path = os.path.join(output_folder, INFO_FILE)
    temp_label_path = label_path.replace(Path(label_path).name, "temp.csv")
    temp_unlabelled_folder = unlabelled_path.replace(
        Path(unlabelled_path).name, "temp_unlabelled")
    temp_wavs_folder = output_path.replace(Path(output_path).name, "temp_wavs")

    clip_generator(
        converted_audio,
        text_path,
        transcription_model,
        forced_alignment_path,
        temp_wavs_folder,
        temp_unlabelled_folder,
        temp_label_path,
        logging=logging,
        symbols=symbols,
        min_length=min_length,
        max_length=max_length,
        min_confidence=min_confidence,
        combine_clips=combine_clips,
    )

    with open(temp_label_path) as f:
        new_labels = f.readlines()

    with open(label_path, "a+") as f:
        for line in new_labels:
            filename, text = line.split("|")
            new_filename = add_suffix(filename, suffix)
            f.write(f"{new_filename}|{text}")

    for filename in os.listdir(temp_wavs_folder):
        new_filename = add_suffix(filename, suffix)
        shutil.copyfile(os.path.join(temp_wavs_folder, filename),
                        os.path.join(output_path, new_filename))

    for filename in os.listdir(temp_unlabelled_folder):
        new_filename = add_suffix(filename, suffix)
        shutil.copyfile(os.path.join(temp_unlabelled_folder, filename),
                        os.path.join(unlabelled_path, new_filename))

    os.remove(temp_label_path)
    shutil.rmtree(temp_wavs_folder)
    shutil.rmtree(temp_unlabelled_folder)
    logging.info("Combined dataset")

    logging.info("Getting dataset info...")
    # Do not pass clip lengths from extend_dataset as we need to get size of entire dataset (not just new clips)
    save_dataset_info(label_path, output_path, info_path)
Ejemplo n.º 4
0
def test_add_suffix():
    new_filename = add_suffix("audio.wav", "converted")
    assert new_filename == "audio-converted.wav"