Exemple #1
0
def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Compute mean and variance of dumped raw features "
                    "(See detail in tensorflow_tts/bin/compute_statistics.py).")
    parser.add_argument("--rootdir", type=str, required=True,
                        help="directory including feature files. ")
    parser.add_argument("--config", type=str, required=True,
                        help="yaml format configuration file.")
    parser.add_argument("--outdir", default=None, type=str, required=True,
                        help="directory to save statistics.")
    parser.add_argument("--verbose", type=int, default=1,
                        help="logging level. higher is more logging. (default=1)")
    args = parser.parse_args()

    # set logger
    if args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")
    elif args.verbose > 0:
        logging.basicConfig(
            level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")
    else:
        logging.basicConfig(
            level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")
        logging.warning('Skip DEBUG/INFO messages')

    # load config
    with open(args.config) as f:
        config = yaml.load(f, Loader=yaml.Loader)
    config.update(vars(args))

    # check directory existence
    if args.outdir is None:
        args.outdir = os.path.dirname(args.rootdir)
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # get dataset
    if config["format"] == "npy":
        mel_query = "*-raw-feats.npy"
        f0_query = "*-raw-f0.npy"
        energy_query = "*-raw-energy.npy"
        mel_load_fn = np.load
    else:
        raise ValueError("Support only npy format.")

    dataset = MelDataset(
        args.rootdir,
        mel_query=mel_query,
        mel_load_fn=mel_load_fn
    ).create(batch_size=1)

    # calculate statistics
    scaler = StandardScaler()
    for mel, mel_length in tqdm(dataset):
        mel = mel[0].numpy()
        scaler.partial_fit(mel)

    # save to file
    stats = np.stack([scaler.mean_, scaler.scale_], axis=0)
    np.save(os.path.join(args.outdir, "stats.npy"), stats.astype(np.float32), allow_pickle=False)

    # calculate statistic of f0
    f0_dataset = AudioDataset(
        args.rootdir,
        audio_query=f0_query,
        audio_load_fn=np.load,
    ).create(batch_size=1)

    pitch_vecs = []
    for f0, f0_length in tqdm(f0_dataset):
        f0 = f0[0].numpy()  # [T]
        f0 = remove_outlier(f0)
        pitch_vecs.append(f0)
    nonzeros = np.concatenate([v[np.where(v != 0.0)[0]]
                               for v in pitch_vecs])
    mean, std = np.mean(nonzeros), np.std(nonzeros)

    # save to file
    stats = np.stack([mean, std], axis=0)
    np.save(os.path.join(args.outdir, "stats_f0.npy"), stats.astype(np.float32), allow_pickle=False)

    # calculate statistic of energy
    energy_dataset = AudioDataset(
        args.rootdir,
        audio_query=energy_query,
        audio_load_fn=np.load,
    ).create(batch_size=1)

    energy_vecs = []
    for e, e_length in tqdm(energy_dataset):
        e = e[0].numpy()
        e = remove_outlier(e)
        energy_vecs.append(e)
    nonzeros = np.concatenate([v[np.where(v != 0.0)[0]]
                               for v in energy_vecs])
    mean, std = np.mean(nonzeros), np.std(nonzeros)

    # save to file
    stats = np.stack([mean, std], axis=0)
    np.save(os.path.join(args.outdir, "stats_energy.npy"), stats.astype(np.float32), allow_pickle=False)
Exemple #2
0
def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description=
        "Normalize dumped raw features (See detail in tensorflow_tts/bin/normalize.py)."
    )
    parser.add_argument(
        "--rootdir",
        default=None,
        type=str,
        required=True,
        help="directory including feature files to be normalized. ")
    parser.add_argument("--outdir",
                        type=str,
                        required=True,
                        help="directory to dump normalized feature files.")
    parser.add_argument("--stats",
                        type=str,
                        required=True,
                        help="statistics file.")
    parser.add_argument("--config",
                        type=str,
                        required=True,
                        help="yaml format configuration file.")
    parser.add_argument(
        "--verbose",
        type=int,
        default=1,
        help="logging level. higher is more logging. (default=1)")
    args = parser.parse_args()

    # set logger
    if args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format=
            "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")
    elif args.verbose > 0:
        logging.basicConfig(
            level=logging.INFO,
            format=
            "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")
    else:
        logging.basicConfig(
            level=logging.WARN,
            format=
            "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s")
        logging.warning('Skip DEBUG/INFO messages')

    # load config
    with open(args.config) as f:
        config = yaml.load(f, Loader=yaml.Loader)
    config.update(vars(args))

    # check directory existence
    os.makedirs(args.outdir, exist_ok=True)
    os.makedirs(os.path.join(args.outdir, 'train', 'norm-feats'),
                exist_ok=True)
    os.makedirs(os.path.join(args.outdir, 'valid', 'norm-feats'),
                exist_ok=True)

    # get dataset
    if args.rootdir is not None:
        if config["format"] == "npy":
            mel_query = "*-raw-feats.npy"

            def mel_load_fn(x):
                return np.load(x, allow_pickle=True)
        else:
            raise ValueError("support only npy format.")

        dataset = MelDataset(
            args.rootdir,
            mel_query=mel_query,
            mel_load_fn=mel_load_fn,
            return_utt_id=True,
        ).create(batch_size=1)

    # restore scaler
    scaler = StandardScaler()
    if config["format"] == "npy":
        scaler.mean_ = np.load(args.stats)[0]
        scaler.scale_ = np.load(args.stats)[1]
    else:
        raise ValueError("Support only npy format")

    # load train/valid utt_ids
    train_utt_ids = np.load(os.path.join(args.rootdir, 'train_utt_ids.npy'))
    valid_utt_ids = np.load(os.path.join(args.rootdir, 'valid_utt_ids.npy'))

    # process each file
    for items in tqdm(dataset):
        utt_id, mel, _ = items

        # convert to numpy
        utt_id = utt_id[0].numpy().decode("utf-8")
        mel = mel[0].numpy()

        # normalize
        mel = scaler.transform(mel)

        # save
        if config["format"] == "npy":
            if utt_id in train_utt_ids:
                subdir = "train"
            elif utt_id in valid_utt_ids:
                subdir = "valid"
            np.save(os.path.join(args.outdir, subdir, "norm-feats",
                                 f"{utt_id}-norm-feats.npy"),
                    mel.astype(np.float32),
                    allow_pickle=False)
        else:
            raise ValueError("support only npy format.")
def main():
    """Run melgan decoding from folder."""
    parser = argparse.ArgumentParser(
        description="Generate Audio from melspectrogram with trained melgan "
        "(See detail in example/melgan/decode_melgan.py)."
    )
    parser.add_argument(
        "--rootdir",
        default=None,
        type=str,
        required=True,
        help="directory including ids/durations files.",
    )
    parser.add_argument(
        "--outdir", type=str, required=True, help="directory to save generated speech."
    )
    parser.add_argument(
        "--checkpoint", type=str, required=True, help="checkpoint file to be loaded."
    )
    parser.add_argument(
        "--use-norm", type=int, default=1, help="Use norm or raw melspectrogram."
    )
    parser.add_argument("--batch-size", type=int, default=8, help="batch_size.")
    parser.add_argument(
        "--config",
        default=None,
        type=str,
        required=True,
        help="yaml format configuration file. if not explicitly provided, "
        "it will be searched in the checkpoint directory. (default=None)",
    )
    parser.add_argument(
        "--verbose",
        type=int,
        default=1,
        help="logging level. higher is more logging. (default=1)",
    )
    args = parser.parse_args()

    # set logger
    if args.verbose > 1:
        logging.basicConfig(
            level=logging.DEBUG,
            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
    elif args.verbose > 0:
        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
    else:
        logging.basicConfig(
            level=logging.WARN,
            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
        logging.warning("Skip DEBUG/INFO messages")

    # check directory existence
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # load config
    with open(args.config) as f:
        config = yaml.load(f, Loader=yaml.Loader)
    config.update(vars(args))

    if config["format"] == "npy":
        mel_query = "*-norm-feats.npy" if args.use_norm == 1 else "*-raw-feats.npy"
        mel_load_fn = np.load
    else:
        raise ValueError("Only npy is supported.")

    # define data-loader
    dataset = MelDataset(
        root_dir=args.rootdir,
        mel_query=mel_query,
        mel_load_fn=mel_load_fn,
    )
    dataset = dataset.create(batch_size=args.batch_size)

    # define model and load checkpoint
    melgan = TFMelGANGenerator(
        config=MelGANGeneratorConfig(**config["generator_params"]), name="melgan"
    )
    melgan._build()
    melgan.load_weights(args.checkpoint)

    for data in tqdm(dataset, desc="[Decoding]"):
        utt_ids, mels, mel_lengths = data["utt_ids"], data["mels"], data["mel_lengths"]
        # melgan inference.
        generated_audios = melgan(mels)

        # convert to numpy.
        generated_audios = generated_audios.numpy()  # [B, T]

        # save to outdir
        for i, audio in enumerate(generated_audios):
            utt_id = utt_ids[i].numpy().decode("utf-8")
            sf.write(
                os.path.join(args.outdir, f"{utt_id}.wav"),
                audio[: mel_lengths[i].numpy() * config["hop_size"]],
                config["sampling_rate"],
                "PCM_16",
            )