예제 #1
0
def join_audios(base_dir: Path, directory: Path, silence_files: float,
                silence_directories: float, output_file: Path,
                dirs_before_files: bool, overwrite: bool) -> bool:
    logger = getLogger(__name__)
    if not directory.is_dir():
        logger.error("Directory was not found!")
        return False

    if output_file.exists() and not overwrite:
        logger.error("File already exists!")
        return False

    all_files = get_files_sorted_recursive(directory, not dirs_before_files)
    all_wav_files = tuple(
        tuple(file for file in file_chunk if file.suffix.lower() == ".wav")
        for file_chunk in all_files)

    final_parts = []
    global_sr = None
    for chunk_nr, file_chunk in enumerate(all_wav_files, start=1):
        if len(file_chunk) == 0:
            continue
        is_last_chunk = chunk_nr == len(all_wav_files)
        for file_nr, file in enumerate(file_chunk, start=1):
            is_last_file = file_nr == len(file_chunk)
            wav, wav_sr = wav_to_float32(file)
            if len(wav.shape) != 1:
                logger.error(
                    f"File {file} is not mono. Only mono files are supported! Skipped."
                )
                continue

            if global_sr is None:
                global_sr = wav_sr
            else:
                if wav_sr != global_sr:
                    logger.error(
                        f"File {file} has another sampling rate than the first file. Skipped."
                    )
                    continue
            final_parts.append(wav)
            if not is_last_file and silence_files > 0:
                pause_samples = np.zeros(
                    (get_sample_count(wav_sr, silence_files), ))
                final_parts.append(pause_samples)

        if not is_last_chunk and silence_directories > 0:
            pause_samples = np.zeros((get_sample_count(wav_sr,
                                                       silence_directories), ))
            final_parts.append(pause_samples)

    logger.debug("Concatenating...")
    result = np.concatenate(tuple(final_parts), axis=-1)

    logger.debug("Saving...")
    output_file.parent.mkdir(parents=True, exist_ok=True)
    float_to_wav(result, output_file, sample_rate=global_sr)
    logger.info(f"Done. Written output to: {output_file.absolute()}")
예제 #2
0
def save_results(output: InferenceEntryOutput, infer_dir: str,
                 denoised_audio_wav_paths: List[Dict[str, Any]]):
    dest_dir = get_inferred_mel_dir(infer_dir, output.identifier)
    os.makedirs(dest_dir, exist_ok=True)
    imageio.imsave(os.path.join(dest_dir, "original.png"), output.mel_orig_img)
    imageio.imsave(os.path.join(dest_dir, "inferred_denoised.png"),
                   output.mel_inferred_denoised_img)
    imageio.imsave(os.path.join(dest_dir, "diff.png"),
                   output.mel_denoised_diff_img)
    np.save(os.path.join(dest_dir, "original.mel.npy"), output.mel_orig)
    np.save(os.path.join(dest_dir, "inferred_denoised.mel.npy"),
            output.mel_inferred_denoised)

    inferred_denoised_path = os.path.join(dest_dir, "inferred_denoised.wav")
    float_to_wav(output.wav_inferred_denoised,
                 inferred_denoised_path,
                 sample_rate=output.inferred_sr)

    float_to_wav(output.wav_inferred,
                 os.path.join(dest_dir, "inferred.wav"),
                 sample_rate=output.inferred_sr)

    stack_images_vertically(list_im=[
        os.path.join(dest_dir, "original.png"),
        os.path.join(dest_dir, "inferred_denoised.png"),
        os.path.join(dest_dir, "diff.png"),
    ],
                            out_path=os.path.join(dest_dir, "comparison.png"))

    wav_info = get_wav_info_dict(
        identifier=output.identifier,
        path=inferred_denoised_path,
        sr=output.inferred_sr,
    )

    denoised_audio_wav_paths.append(wav_info)
예제 #3
0
def save_results(entry: PreparedData, output: ValidationEntryOutput, val_dir: str, iteration: int):
  dest_dir = get_val_entry_dir(val_dir, entry, iteration)
  imageio.imsave(os.path.join(dest_dir, "original.png"), output.mel_orig_img)
  imageio.imsave(os.path.join(dest_dir, "inferred_denoised.png"), output.mel_inferred_denoised_img)
  imageio.imsave(os.path.join(dest_dir, "diff.png"), output.mel_denoised_diff_img)
  np.save(os.path.join(dest_dir, "original.mel.npy"), output.mel_orig)
  np.save(os.path.join(dest_dir, "inferred_denoised.mel.npy"), output.mel_inferred_denoised)
  float_to_wav(output.wav_orig, os.path.join(
    dest_dir, "original.wav"), sample_rate=output.orig_sr)

  float_to_wav(output.wav_inferred_denoised, os.path.join(
    dest_dir, "inferred_denoised.wav"), sample_rate=output.inferred_sr)

  float_to_wav(output.wav_inferred, os.path.join(
    dest_dir, "inferred.wav"), sample_rate=output.inferred_sr)

  stack_images_vertically(
    list_im=[
      os.path.join(dest_dir, "original.png"),
      os.path.join(dest_dir, "inferred_denoised.png"),
      os.path.join(dest_dir, "diff.png"),
    ],
    out_path=os.path.join(dest_dir, "comparison.png")
  )
예제 #4
0
def save_val_wav(val_dir: str, sampling_rate: int, wav) -> str:
    path = get_val_wav_path(val_dir)
    float_to_wav(wav, path, sample_rate=sampling_rate)
    return path
예제 #5
0
def save_infer_wav(infer_dir: str, sampling_rate: int, wav: np.ndarray):
    path = os.path.join(infer_dir, f"{get_parent_dirname(infer_dir)}.wav")
    float_to_wav(wav, path, sample_rate=sampling_rate)
예제 #6
0
def _infer(infer_dir: str, run_name: str, checkpoint_path: str,
           mel_entries: List[InferMelEntry], sigma: float,
           denoiser_strength: float, sentence_pause_s: Optional[float],
           custom_hparams: Optional[Dict[str, str]], no_concatenation: bool,
           seed: int, copy_wav_info_to: Optional[str]):
    logger = prepare_logger(os.path.join(infer_dir, "log.txt"))

    checkpoint = CheckpointWaveglow.load(checkpoint_path, logger)
    concatenate = not no_concatenation

    denoised_audio_wav_paths: List[Dict[str, Any]] = []
    save_callback = partial(save_results,
                            infer_dir=infer_dir,
                            denoised_audio_wav_paths=denoised_audio_wav_paths)

    inference_results, complete = infer_core(
        mel_entries=mel_entries,
        checkpoint=checkpoint,
        custom_hparams=custom_hparams,
        denoiser_strength=denoiser_strength,
        sigma=sigma,
        sentence_pause_s=sentence_pause_s,
        logger=logger,
        save_callback=save_callback,
        concatenate=concatenate,
        seed=seed,
    )

    if concatenate:
        complete_wav_denoised, complete_wav_denoised_sr = complete
        assert complete_wav_denoised is not None
        assert complete_wav_denoised_sr is not None
        float_to_wav(complete_wav_denoised,
                     os.path.join(infer_dir, "complete_denoised.wav"),
                     sample_rate=complete_wav_denoised_sr)

    logger.info("Creating mel_inferred_denoised_v.png")
    mel_inferred_denoised_v_plot(infer_dir, inference_results)

    logger.info("Creating mel_inferred_denoised_h.png")
    mel_inferred_denoised_h_plot(infer_dir, inference_results)

    logger.info("Creating total.csv")
    save_stats(infer_dir, inference_results)

    wav_paths_json = save_denoised_audio_wav_paths(
        infer_dir=infer_dir,
        name=run_name,
        denoised_audio_wav_paths=denoised_audio_wav_paths,
    )

    logger.info(
        "Wrote all inferred mel paths including sampling rate into these file(s):"
    )
    logger.info(wav_paths_json)

    if copy_wav_info_to is not None:
        create_parent_folder(copy_wav_info_to)
        copyfile(wav_paths_json, copy_wav_info_to)
        logger.info(copy_wav_info_to)

    logger.info(f"Saved output to: {infer_dir}")