def main():
    dcp = "Extract feature statistics"
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--phase", type=str, default=None, help="phase")
    parser.add_argument("--conf", type=str, help="ymal file for network parameters")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    args = parser.parse_args()

    conf = load_yaml(args.conf)
    scp = open_scpdir(Path(args.scpdir) / args.phase)
    featdir = Path(args.featdir) / conf["feature"]["label"]
    featsscp = featdir / args.phase / "feats.scp"
    scp["feats"] = open_featsscp(featsscp)
    scaler = {}

    # speaker independent scaler extraction
    feats = ["mlfb", "lcf0"]
    # NOTE: need to be improved, require smart way
    if conf["feature"]["fs"] != 8000:
        feats.append("mcep")

    for win_type in conf["feature"]["window_types"]:
        if win_type != "hann":
            feats += [f"mlfb_{win_type}"]

    for ext in feats:
        s = Scaler()
        s.fit(list(scp["feats"].values()), ext=ext)
        logging.info("# of samples for {}: {}".format(ext, s.ss.n_samples_seen_))
        scaler[ext] = s.ss

    # speaker dependent statistics extraction
    for spkr in scp["spkrs"]:
        file_lists_sd = [scp["feats"][uid] for uid in scp["spk2utt"][spkr]]
        s = Scaler()
        s.fit(file_lists_sd, ext="lcf0")
        logging.info(
            "# of samples {} of {}: {} samples".format(
                "lcf0", spkr, s.ss.n_samples_seen_
            )
        )
        scaler[spkr] = {"lcf0": s.ss}

    pklf = featdir / "scaler.pkl"
    joblib.dump(scaler, str(pklf))
    logging.info("Save scaler to {}".format(pklf))
Ejemplo n.º 2
0
def main():
    dcp = "Extract feature statistics"
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--phase", type=str, default=None, help="phase")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    parser.add_argument("--expdir", type=str, help="exp directory")
    args = parser.parse_args()

    conf = load_yaml(args.conf)
    scp = open_scpdir(Path(args.scpdir) / args.phase)
    featsscp = Path(
        args.featdir) / conf["feature"]["label"] / args.phase / "feats.scp"
    scp["feats"] = open_featsscp(featsscp)
    expdir = Path(args.expdir)
    scaler = {}

    # speaker independent scaler extraction
    feats = ["mlfb", "mcep", "lcf0"]
    for ext in feats:
        s = Scaler()
        s.fit(list(scp["feats"].values()), ext=ext)
        logging.info("# of samples for {}: {}".format(ext,
                                                      s.ss.n_samples_seen_))
        scaler[ext] = s.ss

    # speaker dependent statistics extraction
    for spkr in scp["spkrs"]:
        file_lists_sd = [scp["feats"][uid] for uid in scp["spk2utt"][spkr]]
        s = Scaler()
        s.fit(file_lists_sd, ext="lcf0")
        logging.info("# of samples {} of {}: {} samples".format(
            "lcf0", spkr, s.ss.n_samples_seen_))
        scaler[spkr] = {"lcf0": s.ss}

    pklf = str(expdir / "{}_scaler.pkl".format(conf["feature"]["label"]))
    joblib.dump(scaler, pklf)
    logging.info("Save scaler to {}".format(pklf))
Ejemplo n.º 3
0
def main():
    # options for python
    description = "Train VQ-VAE model"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("--flag",
                        help='flag ["train", "eval", "reconstruction"]')
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--checkpoint", type=str, default=None, help="Resume")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    parser.add_argument("--featsscp",
                        type=str,
                        help="specify feats.scp not scpdir")
    parser.add_argument("--expdir", type=str, help="exp directory")
    args = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    assert str(device) == "cuda", "ERROR: Do not accept CPU training."

    # load configure files
    conf = load_yaml(args.conf)
    for k, v in conf.items():
        logging.info("{}: {}".format(k, v))

    # load scp
    scp = {}
    featdir = Path(args.featdir) / conf["feature"]["label"]
    for phase in ["train", "dev", "eval"]:
        scp[phase] = open_scpdir(Path(args.scpdir) / phase)
        scp[phase]["feats"] = open_featsscp(featdir / phase / "feats.scp")
    if args.flag == "eval" and args.featsscp != "None":
        logging.info("Load feats.scp from {}".format(args.featsscp))
        scp[args.flag]["feats"] = open_featsscp(args.featsscp)

    expdir = Path(args.expdir) / Path(args.conf).stem
    expdir.mkdir(exist_ok=True, parents=True)
    spkr_size = len(scp["train"]["spkrs"])

    # load model
    model = get_model(conf, spkr_size, device)
    resume = 0
    if args.checkpoint != "None":
        model, resume = load_checkpoint(model, args.checkpoint)
    else:
        if args.flag in ["reconstruction", "eval"]:
            import re
            pkls = list(expdir.glob("*.pkl"))
            steps = [re.findall('[0-9]+', str(p.stem))[0] for p in pkls]
            max_step = max([int(s) for s in steps])
            checkpoint = str([p for p in pkls if str(max_step) in str(p)][0])
            model, resume = load_checkpoint(model, checkpoint)

    # load others
    scaler = joblib.load(
        Path(args.expdir) / "{}_scaler.pkl".format(conf["feature"]["label"]))
    optimizer = get_optimizer(conf, model)
    criterion = get_criterion(conf)
    dataloader = get_dataloader(conf,
                                scp,
                                scaler,
                                n_jobs=args.n_jobs,
                                flag=args.flag)
    scheduler = get_scheduler(conf, optimizer)
    writer = {
        "train":
        SummaryWriter(logdir=args.expdir + "/runs/train-" + expdir.name),
        "dev": SummaryWriter(logdir=args.expdir + "/runs/dev-" + expdir.name),
    }

    ka = {
        "model": model,
        "optimizer": optimizer,
        "criterion": criterion,
        "dataloader": dataloader,
        "writer": writer,
        "expdir": expdir,
        "conf": conf,
        "feat_conf": conf["feature"],
        "scheduler": scheduler,
        "device": device,
        "scaler": scaler,
        "resume": resume,
    }
    trainer = TrainerWrapper(conf["trainer_type"], **ka)
    trainer.run(flag=args.flag)
Ejemplo n.º 4
0
def main():

    parser = argparse.ArgumentParser(description="calculate MCD.")
    parser.add_argument("--conf",
                        type=str,
                        required=True,
                        help="Configuration file")
    parser.add_argument("--spkr_conf",
                        type=str,
                        required=True,
                        help="Speaker configuration file")
    parser.add_argument(
        "--featdir",
        type=str,
        required=True,
        help="Root directory of ground truth feature h5 files",
    )
    parser.add_argument("--outwavdir",
                        type=str,
                        required=True,
                        help="Converted waveform directory")
    parser.add_argument(
        "--out",
        "-O",
        type=str,
        help="The output filename. "
        "If omitted, then output to sys.stdout",
    )
    parser.add_argument("--n_jobs",
                        default=40,
                        type=int,
                        help="number of parallel jobs")
    args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        stream=sys.stdout,
        format="%(asctime)s (%(module)s:%(lineno)d) "
        "%(levelname)s: %(message)s",
    )

    # load configure files
    conf = load_yaml(args.conf)
    spkr_conf = load_yaml(args.spkr_conf)

    # load converted files. If mcep, use h5; else, waveform
    if conf["feat_type"] == "mcep":
        converted_files = sorted(list(Path(args.outwavdir).glob("*.h5")))
    else:
        converted_files = sorted(list(Path(args.outwavdir).glob("*.wav")))
    logging.info(f"number of utterances = {len(converted_files)}")

    # load ground truth scp
    featdir = Path(args.featdir) / conf["feature"]["label"]
    gt_feats = open_featsscp(featdir / "eval" / "feats.scp")

    if args.out is None:
        out = sys.stdout
    else:
        out = open(args.out, "w", encoding="utf-8")

    MCD_list = Parallel(args.n_jobs)([
        delayed(calculate)(cv_path, gt_feats, conf, spkr_conf)
        for cv_path in converted_files
    ])

    # summarize by pair
    pairwise_MCD = {}
    for k, v in MCD_list:
        orgspk, tarspk, _ = k.split("-")
        pair = orgspk + "-" + tarspk
        if pair not in pairwise_MCD:
            pairwise_MCD[pair] = []
        pairwise_MCD[pair].append(v)

    for k in sorted(pairwise_MCD.keys()):
        mcd_list = pairwise_MCD[k]
        mean_mcd = float(sum(mcd_list) / len(mcd_list))
        out.write(f"{k} {mean_mcd:.3f}\n")
Ejemplo n.º 5
0
def main():
    # options for python
    description = "Train VQ-VAE model"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(
        "--flag",
        type=str,
        default="train",
        help='Flag for ["train", "eval", "reconstruction"]',
    )
    parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs")
    parser.add_argument("--conf",
                        type=str,
                        help="ymal file for network parameters")
    parser.add_argument("--checkpoint",
                        type=str,
                        default=None,
                        help="Resume model for re-training")
    parser.add_argument("--scpdir", type=str, help="scp directory")
    parser.add_argument("--featdir", type=str, help="output feature directory")
    parser.add_argument(
        "--featsscp",
        type=str,
        help="specify feats.scp instead of using scp directory")
    parser.add_argument("--expdir", type=str, help="exp directory")
    args = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    assert str(device) == "cuda", "ERROR: Do not accept CPU training."

    # load configure files
    conf = load_yaml(args.conf)
    for k, v in conf.items():
        logging.info("{}: {}".format(k, v))

    # load scp
    scp = {}
    featdir = Path(args.featdir) / conf["feature"]["label"]
    for phase in ["train", "dev", "eval"]:
        scp[phase] = open_scpdir(Path(args.scpdir) / phase)
        scp[phase]["feats"] = open_featsscp(featdir / phase / "feats.scp")
    if args.flag == "eval" and args.featsscp != "None":
        logging.info("Load feats.scp from {}".format(args.featsscp))
        scp[args.flag]["feats"] = open_featsscp(args.featsscp)

    expdir = Path(args.expdir) / Path(args.conf).stem
    expdir.mkdir(exist_ok=True, parents=True)
    spkr_size = len(scp["train"]["spkrs"])

    # load model
    model = get_model(conf, spkr_size, device)
    resume = 0
    if args.checkpoint != "None":
        model, resume = load_checkpoint(model, args.checkpoint)
    else:
        if args.flag in ["reconstruction", "eval"]:
            checkpoint = list(expdir.glob("*.pkl"))[-1]
            model, resume = load_checkpoint(model, checkpoint)

    # load others
    scaler = joblib.load(
        Path(args.expdir) / "{}_scaler.pkl".format(conf["feature"]["label"]))
    optimizer = get_optimizer(conf, model)
    criterion = get_criterion(conf)
    dataloader = get_dataloader(conf,
                                scp,
                                scaler,
                                n_jobs=args.n_jobs,
                                flag=args.flag)
    scheduler = get_scheduler(conf, optimizer)
    writer = {
        "train":
        SummaryWriter(logdir=args.expdir + "/runs/train-" + expdir.name),
        "dev": SummaryWriter(logdir=args.expdir + "/runs/dev-" + expdir.name),
    }

    ka = {
        "model": model,
        "optimizer": optimizer,
        "criterion": criterion,
        "dataloader": dataloader,
        "writer": writer,
        "expdir": expdir,
        "conf": conf,
        "feat_conf": conf["feature"],
        "scheduler": scheduler,
        "device": device,
        "scaler": scaler,
        "resume": resume,
    }

    if conf["trainer_type"] == "vqvae":
        trainer = VQVAETrainer(**ka)
    elif conf["trainer_type"] == "lsgan":
        trainer = LSGANTrainer(**ka)
    elif conf["trainer_type"] == "cycle":
        trainer = CycleVQVAETrainer(**ka)
    elif conf["trainer_type"] == "cyclegan":
        trainer = CycleGANTrainer(**ka)
    else:
        raise NotImplementedError(
            "conf['trainer_type']: {} is not supported.".format(
                conf["trainer_type"]))
    trainer.run(flag=args.flag)