Esempio n. 1
0
def plot_mels(base_dir: Path,
              ds_name: str,
              wav_name: str,
              custom_hparams: Optional[Dict[str, str]] = None) -> None:
    print("Plotting wav mel spectograms...")
    ds_dir = get_ds_dir(base_dir, ds_name)
    plots_dir = get_plots_dir(ds_dir, wav_name)
    if plots_dir.is_dir():
        print("Already exists.")
    else:
        wav_dir = get_wav_dir(ds_dir, wav_name)
        assert wav_dir.is_dir()
        data = load_wav_data(wav_dir)
        ds_data = load_ds_data(ds_dir)
        assert len(data) > 0
        save_callback = partial(save_plot,
                                dest_dir=plots_dir,
                                data_len=len(data))
        all_absolute_paths = process(data, ds_data, wav_dir, custom_hparams,
                                     save_callback)

        # all_paths = get_all_paths(plots_dir)

        batches = make_batches_h_v(all_absolute_paths, VERTICAL_COUNT,
                                   HORIZONTAL_COUNT)

        plot_batches_h_v(batches, plots_dir)
Esempio n. 2
0
def text_stats(base_dir: Path, ds_name: str, text_name: str):
    logger = getLogger(__name__)
    logger.info(f"Stats of {text_name}")
    ds_dir = get_ds_dir(base_dir, ds_name)
    text_dir = get_text_dir(ds_dir, text_name)
    if text_dir.is_dir():
        ds_data = load_ds_data(ds_dir)
        text_data = load_text_data(text_dir)
        log_stats(ds_data, text_data)
Esempio n. 3
0
def merge_to_final_ds(base_dir: Path, ds_name: str, text_name: str,
                      audio_name: str, final_name: str,
                      overwrite: bool) -> FinalDsEntryList:
    logger = getLogger(__name__)
    ds_dir = get_ds_dir(base_dir, ds_name)
    final_dir = get_final_dir(ds_dir, final_name)

    if final_dir.is_dir() and not overwrite:
        logger.info("Directory already exists!")
        return

    if not ds_dir.is_dir() or not ds_dir.exists():
        msg = "Dataset not found!"
        logger.exception(msg)
        raise Exception(msg)

    text_dir = get_text_dir(ds_dir, text_name)
    if not text_dir.is_dir() or not text_dir.exists():
        msg = "Text data not found!"
        logger.exception(msg)
        raise Exception(msg)

    wav_dir = get_wav_dir(ds_dir, audio_name)
    if not wav_dir.is_dir() or not wav_dir.exists():
        msg = "Wav data not found!"
        logger.exception(msg)
        raise Exception(msg)

    mel_dir = get_mel_dir(ds_dir, audio_name)
    if not mel_dir.is_dir() or not mel_dir.exists():
        msg = "Mel data not found!"
        logger.exception(msg)
        raise Exception(msg)

    ds_data = load_ds_data(ds_dir)
    text_data = load_text_data(text_dir)
    wav_data = load_wav_data(wav_dir)
    mel_data = load_mel_data(mel_dir)

    final_data = get_final_ds_from_data(
        ds_data=ds_data,
        text_data=text_data,
        wav_data=wav_data,
        mel_data=mel_data,
        wav_dir=wav_dir,
        mel_dir=mel_dir,
    )

    if final_dir.is_dir():
        assert overwrite
        logger.info("Overwriting existing data.")
        rmtree(final_dir)
    final_dir.mkdir(parents=True, exist_ok=False)

    save_final_ds(final_dir, final_data)
    save_analysis_df(final_dir, final_data)
    logger.info("Done.")
Esempio n. 4
0
def wavs_stats(base_dir: Path, ds_name: str, wav_name: str) -> None:
  logger = getLogger(__name__)
  logger.info(f"Stats of {wav_name}")
  ds_dir = get_ds_dir(base_dir, ds_name)
  wav_dir = get_wav_dir(ds_dir, wav_name)
  if wav_dir.is_dir():
    ds_data = load_ds_data(ds_dir)
    wav_data = load_wav_data(wav_dir)
    log_stats(ds_data, wav_data)
Esempio n. 5
0
def preprocess_wavs(base_dir: Path, ds_name: str, wav_name: str, overwrite: bool = False) -> None:
  logger = getLogger(__name__)
  logger.info("Preprocessing wavs...")
  ds_dir = get_ds_dir(base_dir, ds_name)
  dest_wav_dir = get_wav_dir(ds_dir, wav_name)
  if dest_wav_dir.is_dir() and not overwrite:
    logger.error("Already exists.")
    return

  data = load_ds_data(ds_dir)

  if dest_wav_dir.is_dir():
    assert overwrite
    logger.info("Overwriting existing data.")
    rmtree(dest_wav_dir)
  dest_wav_dir.mkdir(exist_ok=False, parents=True)

  wav_data = preprocess(data, dest_wav_dir, n_jobs=cpu_count() - 1)
  save_wav_data(dest_wav_dir, wav_data)
  ds_data = load_ds_data(ds_dir)
  log_stats(ds_data, wav_data)
Esempio n. 6
0
def __wav_op(base_dir: Path, ds_name: str, origin_wav_name: str, destination_wav_name: str, op: Callable[[WavDataList, Path, Path], WavDataList], overwrite: bool) -> None:
  logger = getLogger(__name__)
  ds_dir = get_ds_dir(base_dir, ds_name)
  dest_wav_dir = get_wav_dir(ds_dir, destination_wav_name)
  if dest_wav_dir.is_dir() and not overwrite:
    logger.error("Already exists.")
    return

  orig_wav_dir = get_wav_dir(ds_dir, origin_wav_name)
  assert orig_wav_dir.is_dir()
  data = load_wav_data(orig_wav_dir)

  if dest_wav_dir.is_dir():
    assert overwrite
    logger.info("Overwriting existing data.")
    rmtree(dest_wav_dir)

  dest_wav_dir.mkdir(exist_ok=False, parents=True)
  wav_data = op(data, orig_wav_dir, dest_wav_dir)
  save_wav_data(dest_wav_dir, wav_data)
  ds_data = load_ds_data(ds_dir)
  log_stats(ds_data, wav_data)
Esempio n. 7
0
def preprocess_text(base_dir: Path, ds_name: str, text_name: str,
                    overwrite: bool) -> None:
    logger = getLogger(__name__)
    logger.info("Preprocessing text...")
    ds_dir = get_ds_dir(base_dir, ds_name)
    text_dir = get_text_dir(ds_dir, text_name)
    if text_dir.is_dir() and not overwrite:
        logger.error("Already exists.")
        return

    data = load_ds_data(ds_dir)
    text_data = preprocess(data)

    if text_dir.is_dir():
        assert overwrite
        logger.info("Overwriting existing data.")
        rmtree(text_dir)
    text_dir.mkdir(parents=True, exist_ok=False)

    save_text_data(text_dir, text_data)
    save_symbols_stats_df(text_dir, text_data)
    save_whole_text(text_dir, text_data)
    save_analytics_df(text_dir, text_data)