def test_feature(): conf = load_yaml(datadir / "mlfb_vqvae.yml") spkr_conf = load_yaml(datadir / "spkr.yml") feat = Feature(datadir, conf["feature"], spkr_conf["SF1"], gl_flag=True) feat.analyze(datadir / "SF1_10001.wav") (datadir / "SF1_10001.h5").unlink() (datadir / "SF1_10001_anasyn.wav").unlink()
def test_feature(): conf = load_yaml(ymlf) spkr_conf = load_yaml(spkrymlf) feat = Feature(datadir, conf["feature"], spkr_conf["SF1"]) feat.analyze( datadir / "SF1_10001.wav", synth_flag=True, ) (datadir / "SF1_10001.h5").unlink() (datadir / "SF1_10001_anasyn.wav").unlink()
def main(): dcp = "Extract aoucstic features" parser = argparse.ArgumentParser(description=dcp) parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs") parser.add_argument("--phase", type=str, default=None, help="phase") parser.add_argument("--n_decode_samples", type=int, default=3, help="# decode samples") parser.add_argument("--conf", type=str, help="ymal file for network parameters") parser.add_argument("--spkr_yml", type=str, help="yml file for speaker params") parser.add_argument("--scpdir", type=str, help="scp directory") parser.add_argument("--featdir", type=str, help="output feature directory") args = parser.parse_args() conf = load_yaml(args.conf) spkr_conf = load_yaml(args.spkr_yml) scp = open_scpdir(Path(args.scpdir) / args.phase) featdir = Path(args.featdir) / conf["feature"]["label"] / args.phase featsscp = featdir / "feats.scp" if featsscp.exists(): featsscp.unlink() for spkr in scp["spkrs"]: logging.info("extract feature for {}".format(spkr)) wavs = [scp["wav"][uid] for uid in scp["spk2utt"][spkr]] (featdir / spkr).mkdir(parents=True, exist_ok=True) feat = Feature(featdir / spkr, conf["feature"], spkr_conf[spkr]) # create feats.scp with open(featsscp, "a") as fp: for uid in scp["spk2utt"][spkr]: wavf = scp["wav"][uid] h5f = str(featdir / spkr / (Path(wavf).stem + ".h5")) fp.write("{} {}\n".format(uid, h5f)) # feature extraction with GliffinLim Parallel(n_jobs=args.n_jobs)([ delayed(feat.analyze)(wavf, synth_flag=True) for wavf in wavs[:args.n_decode_samples] ]) # feature extraction without GliffinLim Parallel(n_jobs=args.n_jobs)([ delayed(feat.analyze)(wavf, synth_flag=False) for wavf in wavs[args.n_decode_samples:] ])
def test_feature_8k(): conf = load_yaml(ymlf) conf["feature"].update({ "fs": 8000, "fftl": 256, "fmin": 80, "fmax": 3800, "hop_size": 80, "mlfb_dim": 80, }) spkr_conf = load_yaml(datadir / "spkr.yml") feat = Feature(datadir, conf["feature"], spkr_conf["SF1"]) feat.analyze(datadir / "SF1_10001_8k.wav", synth_flag=False) (datadir / "SF1_10001_8k.h5").unlink()
def test_dataset(decoder_f0, use_mcep, use_raw): conf = load_yaml(ymlf) conf["decoder_f0"] = decoder_f0 conf["receptive_size"] = 128 if use_mcep: conf["input_feat_type"] = "mcep" conf["output_feat_type"] = "mcep" conf["ignore_scaler"] = ["mcep", "raw"] if use_raw: conf["use_raw"] = True conf["input_feat_type"] = "mlfb" conf["ignore_scaler"] = ["raw"] scp = {} scpdir = datadir / "scpdir" for phase in ["train", "dev", "eval"]: scp[phase] = open_scpdir(scpdir / phase) scp[phase]["feats"] = {"01": h5f, "02": h5f, "03": h5f} dataset = BaseDataset(conf, scp, phase="train", scaler=scaler) dataloader = DataLoader(dataset, batch_size=12, shuffle=True, num_workers=1) for i, batch in enumerate(dataloader): for k, v in batch.items(): if isinstance(v, torch.Tensor): pass # print(k, v.type(), v.size()) else: pass
def main(): parser = argparse.ArgumentParser( description= "Convert filter banks to waveform using Griffin-Lim algorithm", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("--conf", type=str, required=True, help="Cofiguration file") parser.add_argument( "--rootdir", type=str, required=True, help="Root directory of filter bank h5 files", ) parser.add_argument("--outdir", type=str, required=True, help="Output directory") args = parser.parse_args() # logging info logging.basicConfig( level=logging.INFO, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) " "%(levelname)s: %(message)s", ) # load configure files conf = load_yaml(args.conf) for k, v in conf.items(): logging.info("{}: {}".format(k, v)) # find h5 files feats_files = sorted(list(Path(args.rootdir).glob("*.h5"))) feats = { Path(args.outdir) / filename.stem + ".wav": read_feature(filename, "feats") for filename in feats_files } # Main Griffin-Lim algorithm Parallel(n_jobs=30)([ delayed(mlfb2wavf)( feats[wavf], wavf, fs=conf["feature"]["fs"], n_mels=conf["feature"]["mlfb_dim"], fftl=conf["feature"]["fftl"], hop_size=conf["feature"]["hop_size"], plot=False, ) for wavf in list(feats.keys()) ])
def main(): dcp = "Extract feature statistics" parser = argparse.ArgumentParser(description=dcp) parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs") parser.add_argument("--phase", type=str, default=None, help="phase") parser.add_argument("--conf", type=str, help="ymal file for network parameters") parser.add_argument("--scpdir", type=str, help="scp directory") parser.add_argument("--featdir", type=str, help="output feature directory") args = parser.parse_args() conf = load_yaml(args.conf) scp = open_scpdir(Path(args.scpdir) / args.phase) featdir = Path(args.featdir) / conf["feature"]["label"] featsscp = featdir / args.phase / "feats.scp" scp["feats"] = open_featsscp(featsscp) scaler = {} # speaker independent scaler extraction feats = ["mlfb", "lcf0"] # NOTE: need to be improved, require smart way if conf["feature"]["fs"] != 8000: feats.append("mcep") for win_type in conf["feature"]["window_types"]: if win_type != "hann": feats += [f"mlfb_{win_type}"] for ext in feats: s = Scaler() s.fit(list(scp["feats"].values()), ext=ext) logging.info("# of samples for {}: {}".format(ext, s.ss.n_samples_seen_)) scaler[ext] = s.ss # speaker dependent statistics extraction for spkr in scp["spkrs"]: file_lists_sd = [scp["feats"][uid] for uid in scp["spk2utt"][spkr]] s = Scaler() s.fit(file_lists_sd, ext="lcf0") logging.info( "# of samples {} of {}: {} samples".format( "lcf0", spkr, s.ss.n_samples_seen_ ) ) scaler[spkr] = {"lcf0": s.ss} pklf = featdir / "scaler.pkl" joblib.dump(scaler, str(pklf)) logging.info("Save scaler to {}".format(pklf))
def main(): dcp = "Extract feature statistics" parser = argparse.ArgumentParser(description=dcp) parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs") parser.add_argument("--phase", type=str, default=None, help="phase") parser.add_argument("--conf", type=str, help="ymal file for network parameters") parser.add_argument("--scpdir", type=str, help="scp directory") parser.add_argument("--featdir", type=str, help="output feature directory") parser.add_argument("--expdir", type=str, help="exp directory") args = parser.parse_args() conf = load_yaml(args.conf) scp = open_scpdir(Path(args.scpdir) / args.phase) featsscp = Path( args.featdir) / conf["feature"]["label"] / args.phase / "feats.scp" scp["feats"] = open_featsscp(featsscp) expdir = Path(args.expdir) scaler = {} # speaker independent scaler extraction feats = ["mlfb", "mcep", "lcf0"] for ext in feats: s = Scaler() s.fit(list(scp["feats"].values()), ext=ext) logging.info("# of samples for {}: {}".format(ext, s.ss.n_samples_seen_)) scaler[ext] = s.ss # speaker dependent statistics extraction for spkr in scp["spkrs"]: file_lists_sd = [scp["feats"][uid] for uid in scp["spk2utt"][spkr]] s = Scaler() s.fit(file_lists_sd, ext="lcf0") logging.info("# of samples {} of {}: {} samples".format( "lcf0", spkr, s.ss.n_samples_seen_)) scaler[spkr] = {"lcf0": s.ss} pklf = str(expdir / "{}_scaler.pkl".format(conf["feature"]["label"])) joblib.dump(scaler, pklf) logging.info("Save scaler to {}".format(pklf))
from pathlib import Path import numpy as np import soundfile as sf import torch from crank.net.module.mlfb import LogMelFilterBankLayer from crank.net.module.sinc_conv import SincConvPreprocessingLayer from crank.utils import load_yaml B, T = 1, 65536 datadir = Path(__file__).parent / "data" ymlf = datadir / "mlfb_vqvae_22050.yml" spkrymlf = datadir / "spkr.yml" conf = load_yaml(ymlf) wavf = datadir / "SF1_10001.wav" def test_sincconv(): sinc_conv = SincConvPreprocessingLayer( in_channels=1, sincconv_channels=32, sincconv_kernel_size=65, out_channels=80, kernel_sizes=[4, 4, 4, 2], ) x, fs = sf.read(str(wavf)) x = np.array(x, dtype=np.float32) x = torch.from_numpy(x).unsqueeze(0).unsqueeze(-1)
def main(): # options for python description = "Train VQ-VAE model" parser = argparse.ArgumentParser(description=description) parser.add_argument("--flag", help='flag ["train", "eval", "reconstruction"]') parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs") parser.add_argument("--conf", type=str, help="ymal file for network parameters") parser.add_argument("--checkpoint", type=str, default=None, help="Resume") parser.add_argument("--scpdir", type=str, help="scp directory") parser.add_argument("--featdir", type=str, help="output feature directory") parser.add_argument("--featsscp", type=str, help="specify feats.scp not scpdir") parser.add_argument("--expdir", type=str, help="exp directory") args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") assert str(device) == "cuda", "ERROR: Do not accept CPU training." # load configure files conf = load_yaml(args.conf) for k, v in conf.items(): logging.info("{}: {}".format(k, v)) # load scp scp = {} featdir = Path(args.featdir) / conf["feature"]["label"] for phase in ["train", "dev", "eval"]: scp[phase] = open_scpdir(Path(args.scpdir) / phase) scp[phase]["feats"] = open_featsscp(featdir / phase / "feats.scp") if args.flag == "eval" and args.featsscp != "None": logging.info("Load feats.scp from {}".format(args.featsscp)) scp[args.flag]["feats"] = open_featsscp(args.featsscp) expdir = Path(args.expdir) / Path(args.conf).stem expdir.mkdir(exist_ok=True, parents=True) spkr_size = len(scp["train"]["spkrs"]) # load model model = get_model(conf, spkr_size, device) resume = 0 if args.checkpoint != "None": model, resume = load_checkpoint(model, args.checkpoint) else: if args.flag in ["reconstruction", "eval"]: import re pkls = list(expdir.glob("*.pkl")) steps = [re.findall('[0-9]+', str(p.stem))[0] for p in pkls] max_step = max([int(s) for s in steps]) checkpoint = str([p for p in pkls if str(max_step) in str(p)][0]) model, resume = load_checkpoint(model, checkpoint) # load others scaler = joblib.load( Path(args.expdir) / "{}_scaler.pkl".format(conf["feature"]["label"])) optimizer = get_optimizer(conf, model) criterion = get_criterion(conf) dataloader = get_dataloader(conf, scp, scaler, n_jobs=args.n_jobs, flag=args.flag) scheduler = get_scheduler(conf, optimizer) writer = { "train": SummaryWriter(logdir=args.expdir + "/runs/train-" + expdir.name), "dev": SummaryWriter(logdir=args.expdir + "/runs/dev-" + expdir.name), } ka = { "model": model, "optimizer": optimizer, "criterion": criterion, "dataloader": dataloader, "writer": writer, "expdir": expdir, "conf": conf, "feat_conf": conf["feature"], "scheduler": scheduler, "device": device, "scaler": scaler, "resume": resume, } trainer = TrainerWrapper(conf["trainer_type"], **ka) trainer.run(flag=args.flag)
def main(): parser = argparse.ArgumentParser(description="calculate MCD.") parser.add_argument("--conf", type=str, required=True, help="Configuration file") parser.add_argument("--spkr_conf", type=str, required=True, help="Speaker configuration file") parser.add_argument( "--featdir", type=str, required=True, help="Root directory of ground truth feature h5 files", ) parser.add_argument("--outwavdir", type=str, required=True, help="Converted waveform directory") parser.add_argument( "--out", "-O", type=str, help="The output filename. " "If omitted, then output to sys.stdout", ) parser.add_argument("--n_jobs", default=40, type=int, help="number of parallel jobs") args = parser.parse_args() # logging info logging.basicConfig( level=logging.INFO, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) " "%(levelname)s: %(message)s", ) # load configure files conf = load_yaml(args.conf) spkr_conf = load_yaml(args.spkr_conf) # load converted files. If mcep, use h5; else, waveform if conf["feat_type"] == "mcep": converted_files = sorted(list(Path(args.outwavdir).glob("*.h5"))) else: converted_files = sorted(list(Path(args.outwavdir).glob("*.wav"))) logging.info(f"number of utterances = {len(converted_files)}") # load ground truth scp featdir = Path(args.featdir) / conf["feature"]["label"] gt_feats = open_featsscp(featdir / "eval" / "feats.scp") if args.out is None: out = sys.stdout else: out = open(args.out, "w", encoding="utf-8") MCD_list = Parallel(args.n_jobs)([ delayed(calculate)(cv_path, gt_feats, conf, spkr_conf) for cv_path in converted_files ]) # summarize by pair pairwise_MCD = {} for k, v in MCD_list: orgspk, tarspk, _ = k.split("-") pair = orgspk + "-" + tarspk if pair not in pairwise_MCD: pairwise_MCD[pair] = [] pairwise_MCD[pair].append(v) for k in sorted(pairwise_MCD.keys()): mcd_list = pairwise_MCD[k] mean_mcd = float(sum(mcd_list) / len(mcd_list)) out.write(f"{k} {mean_mcd:.3f}\n")
def main(): # options for python description = "Train VQ-VAE model" parser = argparse.ArgumentParser(description=description) parser.add_argument( "--flag", type=str, default="train", help='Flag for ["train", "eval", "reconstruction"]', ) parser.add_argument("--n_jobs", type=int, default=-1, help="# of CPUs") parser.add_argument("--conf", type=str, help="ymal file for network parameters") parser.add_argument("--checkpoint", type=str, default=None, help="Resume model for re-training") parser.add_argument("--scpdir", type=str, help="scp directory") parser.add_argument("--featdir", type=str, help="output feature directory") parser.add_argument( "--featsscp", type=str, help="specify feats.scp instead of using scp directory") parser.add_argument("--expdir", type=str, help="exp directory") args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") assert str(device) == "cuda", "ERROR: Do not accept CPU training." # load configure files conf = load_yaml(args.conf) for k, v in conf.items(): logging.info("{}: {}".format(k, v)) # load scp scp = {} featdir = Path(args.featdir) / conf["feature"]["label"] for phase in ["train", "dev", "eval"]: scp[phase] = open_scpdir(Path(args.scpdir) / phase) scp[phase]["feats"] = open_featsscp(featdir / phase / "feats.scp") if args.flag == "eval" and args.featsscp != "None": logging.info("Load feats.scp from {}".format(args.featsscp)) scp[args.flag]["feats"] = open_featsscp(args.featsscp) expdir = Path(args.expdir) / Path(args.conf).stem expdir.mkdir(exist_ok=True, parents=True) spkr_size = len(scp["train"]["spkrs"]) # load model model = get_model(conf, spkr_size, device) resume = 0 if args.checkpoint != "None": model, resume = load_checkpoint(model, args.checkpoint) else: if args.flag in ["reconstruction", "eval"]: checkpoint = list(expdir.glob("*.pkl"))[-1] model, resume = load_checkpoint(model, checkpoint) # load others scaler = joblib.load( Path(args.expdir) / "{}_scaler.pkl".format(conf["feature"]["label"])) optimizer = get_optimizer(conf, model) criterion = get_criterion(conf) dataloader = get_dataloader(conf, scp, scaler, n_jobs=args.n_jobs, flag=args.flag) scheduler = get_scheduler(conf, optimizer) writer = { "train": SummaryWriter(logdir=args.expdir + "/runs/train-" + expdir.name), "dev": SummaryWriter(logdir=args.expdir + "/runs/dev-" + expdir.name), } ka = { "model": model, "optimizer": optimizer, "criterion": criterion, "dataloader": dataloader, "writer": writer, "expdir": expdir, "conf": conf, "feat_conf": conf["feature"], "scheduler": scheduler, "device": device, "scaler": scaler, "resume": resume, } if conf["trainer_type"] == "vqvae": trainer = VQVAETrainer(**ka) elif conf["trainer_type"] == "lsgan": trainer = LSGANTrainer(**ka) elif conf["trainer_type"] == "cycle": trainer = CycleVQVAETrainer(**ka) elif conf["trainer_type"] == "cyclegan": trainer = CycleGANTrainer(**ka) else: raise NotImplementedError( "conf['trainer_type']: {} is not supported.".format( conf["trainer_type"])) trainer.run(flag=args.flag)