def main(): """Run decoding process.""" parser = argparse.ArgumentParser( description= "Decode dumped features with trained Parallel WaveGAN Generator " "(See detail in parallel_wavegan/bin/decode.py).") parser.add_argument("--feats-scp", "--scp", default=None, type=str, help="kaldi-style feats.scp file. " "you need to specify either feats-scp or dumpdir.") parser.add_argument("--dumpdir", default=None, type=str, help="directory including feature files. " "you need to specify either feats-scp or dumpdir.") parser.add_argument("--outdir", type=str, required=True, help="directory to save generated speech.") parser.add_argument("--checkpoint", type=str, required=True, help="checkpoint file to be loaded.") parser.add_argument( "--config", default=None, type=str, help="yaml format configuration file. if not explicitly provided, " "it will be searched in the checkpoint directory. (default=None)") parser.add_argument( "--verbose", type=int, default=1, help="logging level. higher is more logging. (default=1)") args = parser.parse_args() # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig( level=logging.WARN, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning("Skip DEBUG/INFO messages") # check directory existence if not os.path.exists(args.outdir): os.makedirs(args.outdir) # load config if args.config is None: dirname = os.path.dirname(args.checkpoint) args.config = os.path.join(dirname, "config.yml") with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.feats_scp is not None and args.dumpdir is not None) or \ (args.feats_scp is None and args.dumpdir is None): raise ValueError("Please specify either --dumpdir or --feats-scp.") # get dataset if args.dumpdir is not None: if config["format"] == "hdf5": mel_query = "*.h5" mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": mel_query = "*-feats.npy" mel_load_fn = np.load else: raise ValueError("Support only hdf5 or npy format.") dataset = MelDataset( args.dumpdir, mel_query=mel_query, mel_load_fn=mel_load_fn, return_utt_id=True, ) else: dataset = MelSCPDataset( feats_scp=args.feats_scp, return_utt_id=True, ) logging.info(f"The number of features to be decoded = {len(dataset)}.") # setup model if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") model = load_model(args.checkpoint, config) logging.info(f"Loaded model parameters from {args.checkpoint}.") model.remove_weight_norm() model = model.eval().to(device) # start generation total_rtf = 0.0 with torch.no_grad(), tqdm(dataset, desc="[decode]") as pbar: for idx, (utt_id, c) in enumerate(pbar, 1): # generate c = torch.tensor(c, dtype=torch.float).to(device) start = time.time() y = model.inference(c).view(-1) rtf = (time.time() - start) / (len(y) / config["sampling_rate"]) pbar.set_postfix({"RTF": rtf}) total_rtf += rtf # save as PCM 16 bit wav file sf.write(os.path.join(config["outdir"], f"{utt_id}_gen.wav"), y.cpu().numpy(), config["sampling_rate"], "PCM_16") # report average RTF logging.info( f"Finished generation of {idx} utterances (RTF = {total_rtf / idx:.03f})." )
def main(): """Run decoding process.""" parser = argparse.ArgumentParser( description="Decode dumped features with trained Parallel WaveGAN Generator.") parser.add_argument("--scp", default=None, type=str, help="Kaldi-style feats.scp file.") parser.add_argument("--dumpdir", default=None, type=str, help="Directory including feature files.") parser.add_argument("--outdir", default=None, type=str, required=True, help="Direcotry to save generated speech.") parser.add_argument("--checkpoint", default=None, type=str, required=True, help="Checkpoint file.") parser.add_argument("--config", default=None, type=str, help="Yaml format configuration file.") parser.add_argument("--verbose", type=int, default=1, help="logging level (higher is more logging)") args = parser.parse_args() # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig( level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning("skip DEBUG/INFO messages") # check direcotry existence if not os.path.exists(args.outdir): os.makedirs(args.outdir) # load config if args.config is None: dirname = os.path.dirname(args.checkpoint) args.config = os.path.join(dirname, "config.yml") with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.scp is not None and args.dumpdir is not None) or \ (args.scp is None and args.dumpdir is None): raise ValueError("Please specify either dumpdir or scp.") # get dataset if args.scp is None: if config["format"] == "hdf5": mel_query = "*.h5" mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": mel_query = "*-feats.npy" mel_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") dataset = MelDataset( args.dumpdir, mel_query=mel_query, mel_load_fn=mel_load_fn, return_filename=True) logging.info(f"the number of features to be decoded = {len(dataset)}.") else: dataset = kaldiio.ReadHelper(f"scp:{args.scp}") logging.info(f"the feature loaded from {args.scp}.") # setup if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") model = ParallelWaveGANGenerator(**config["generator_params"]) model.load_state_dict(torch.load(args.checkpoint, map_location="cpu")["model"]["generator"]) model.remove_weight_norm() model = model.eval().to(device) logging.info(f"loaded model parameters from {args.checkpoint}.") # start generation pad_size = (config["generator_params"]["aux_context_window"], config["generator_params"]["aux_context_window"]) total_rtf = 0.0 with torch.no_grad(), tqdm(dataset, desc="[decode]") as pbar: for idx, (feat_path, c) in enumerate(pbar, 1): # generate each utterance z = torch.randn(1, 1, c.shape[0] * config["hop_size"]).to(device) c = np.pad(c, (pad_size, (0, 0)), "edge") c = torch.FloatTensor(c).unsqueeze(0).transpose(2, 1).to(device) start = time.time() y = model(z, c).view(-1).cpu().numpy() rtf = (time.time() - start) / (len(y) / config["sampling_rate"]) pbar.set_postfix({"RTF": rtf}) total_rtf += rtf # save as PCM 16 bit wav file utt_id = os.path.splitext(os.path.basename(feat_path))[0] sf.write(os.path.join(config["outdir"], f"{utt_id}_gen.wav"), y, config["sampling_rate"], "PCM_16") # report average RTF logging.info(f"finished generation of {idx} utterances (RTF = {total_rtf / idx:.03f}).")
def make_wav(args): import argparse import logging import os import time import numpy as np import soundfile as sf import torch import yaml from tqdm import tqdm import parallel_wavegan.models from parallel_wavegan.datasets import MelDataset from parallel_wavegan.datasets import MelSCPDataset from parallel_wavegan.utils import read_hdf5 """Run decoding process.""" # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig( level=logging.WARN, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning("Skip DEBUG/INFO messages") # check directory existence if not os.path.exists(args.outdir): os.makedirs(args.outdir) # load config if args.config is None: dirname = os.path.dirname(args.checkpoint) args.config = os.path.join(dirname, "config.yml") with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.feats_scp is not None and args.dumpdir is not None) or \ (args.feats_scp is None and args.dumpdir is None): raise ValueError("Please specify either --dumpdir or --feats-scp.") # get dataset if args.dumpdir is not None: if config["format"] == "hdf5": mel_query = "*.h5" mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": mel_query = "*-feats.npy" mel_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") dataset = MelDataset( args.dumpdir, mel_query=mel_query, mel_load_fn=mel_load_fn, return_utt_id=True, ) else: dataset = MelSCPDataset( feats_scp=args.feats_scp, return_utt_id=True, ) logging.info(f"The number of features to be decoded = {len(dataset)}.") # setup if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") model_class = getattr( parallel_wavegan.models, config.get("generator_type", "ParallelWaveGANGenerator")) model = model_class(**config["generator_params"]) model.load_state_dict( torch.load(args.checkpoint, map_location="cpu")["model"]["generator"]) logging.info(f"Loaded model parameters from {args.checkpoint}.") model.remove_weight_norm() model = model.eval().to(device) use_noise_input = not isinstance(model, parallel_wavegan.models.MelGANGenerator) pad_fn = torch.nn.ReplicationPad1d(config["generator_params"].get( "aux_context_window", 0)) # start generation total_rtf = 0.0 with torch.no_grad(), tqdm(dataset, desc="[decode]") as pbar: for idx, (utt_id, c) in enumerate(pbar, 1): x = () #c = c.T if use_noise_input: z = torch.randn(1, 1, np.shape(c)[2] * config["hop_size"]).to(device) x += (z, ) print(c.shape) c = torch.from_numpy(c) c = c.type(torch.cuda.FloatTensor).to(device) c = pad_fn(c) x += (c, ) # setup input #--------------------------------------------------------------------- ''' x = () print(c.shape) if use_noise_input: print('len(c).shape: ', len(c)) z = torch.randn(1, 1, np.shape(c)[2] * config["hop_size"]).to(device) x += (z,) c = c.type(torch.cuda.FloatTensor).to(device) c = pad_fn(c) x += (c,) #c = pad_fn(torch.from_numpy(c).unsqueeze(0).transpose(2, 1)).to(device) ''' #--------------------------------------------------------- ''' import pickle x_ = () with open('test2.pickle', 'rb') as f: c_ = pickle.load(f) print(c_.shape) if use_noise_input: #print('c_.shape : ', np.shape(c_)[2]) z = torch.randn(1, 1, np.shape(c_)[2] * config["hop_size"]).to(device) x_ += (z,) c_ = c_.type(torch.cuda.FloatTensor).to(device) c_ = pad_fn(c_) x_ += (c_,) ''' #--------------------------------------------------------- # generate start = time.time() y = model(*x).view(-1).cpu().numpy() rtf = (time.time() - start) / (len(y) / config["sampling_rate"]) pbar.set_postfix({"RTF": rtf}) total_rtf += rtf # save as PCM 16 bit wav file sf.write(os.path.join(config["outdir"], f"{utt_id}_gen.wav"), y, config["sampling_rate"], "PCM_16") # report average RTF logging.info( f"Finished generation of {idx} utterances (RTF = {total_rtf / idx:.03f})." )
def main(): """Run preprocessing process.""" parser = argparse.ArgumentParser( description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)." ) parser.add_argument( "--rootdir", default=None, type=str, help="directory including feature files to be normalized. " "you need to specify either *-scp or rootdir.", ) parser.add_argument( "--wav-scp", default=None, type=str, help="kaldi-style wav.scp file. " "you need to specify either *-scp or rootdir.", ) parser.add_argument( "--feats-scp", default=None, type=str, help="kaldi-style feats.scp file. " "you need to specify either *-scp or rootdir.", ) parser.add_argument( "--segments", default=None, type=str, help="kaldi-style segments file.", ) parser.add_argument( "--dumpdir", type=str, required=True, help="directory to dump normalized feature files.", ) parser.add_argument( "--stats", type=str, required=True, help="statistics file.", ) parser.add_argument( "--skip-wav-copy", default=False, action="store_true", help="whether to skip the copy of wav files.", ) parser.add_argument( "--config", type=str, required=True, help="yaml format configuration file." ) parser.add_argument( "--verbose", type=int, default=1, help="logging level. higher is more logging. (default=1)", ) args = parser.parse_args() # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) else: logging.basicConfig( level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) logging.warning("Skip DEBUG/INFO messages") # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.feats_scp is not None and args.rootdir is not None) or ( args.feats_scp is None and args.rootdir is None ): raise ValueError("Please specify either --rootdir or --feats-scp.") # check directory existence if not os.path.exists(args.dumpdir): os.makedirs(args.dumpdir) # get dataset if args.rootdir is not None: if config["format"] == "hdf5": audio_query, mel_query = "*.h5", "*.h5" audio_load_fn = lambda x: read_hdf5(x, "wave") # NOQA mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": audio_query, mel_query = "*-wave.npy", "*-feats.npy" audio_load_fn = np.load mel_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") if not args.skip_wav_copy: dataset = AudioMelDataset( root_dir=args.rootdir, audio_query=audio_query, mel_query=mel_query, audio_load_fn=audio_load_fn, mel_load_fn=mel_load_fn, return_utt_id=True, ) else: dataset = MelDataset( root_dir=args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn, return_utt_id=True, ) else: if not args.skip_wav_copy: dataset = AudioMelSCPDataset( wav_scp=args.wav_scp, feats_scp=args.feats_scp, segments=args.segments, return_utt_id=True, ) else: dataset = MelSCPDataset( feats_scp=args.feats_scp, return_utt_id=True, ) logging.info(f"The number of files = {len(dataset)}.") # restore scaler scaler = StandardScaler() if config["format"] == "hdf5": scaler.mean_ = read_hdf5(args.stats, "mean") scaler.scale_ = read_hdf5(args.stats, "scale") elif config["format"] == "npy": scaler.mean_ = np.load(args.stats)[0] scaler.scale_ = np.load(args.stats)[1] else: raise ValueError("support only hdf5 or npy format.") # from version 0.23.0, this information is needed scaler.n_features_in_ = scaler.mean_.shape[0] # process each file for items in tqdm(dataset): if not args.skip_wav_copy: utt_id, audio, mel = items else: utt_id, mel = items # normalize mel = scaler.transform(mel) # save if config["format"] == "hdf5": write_hdf5( os.path.join(args.dumpdir, f"{utt_id}.h5"), "feats", mel.astype(np.float32), ) if not args.skip_wav_copy: write_hdf5( os.path.join(args.dumpdir, f"{utt_id}.h5"), "wave", audio.astype(np.float32), ) elif config["format"] == "npy": np.save( os.path.join(args.dumpdir, f"{utt_id}-feats.npy"), mel.astype(np.float32), allow_pickle=False, ) if not args.skip_wav_copy: np.save( os.path.join(args.dumpdir, f"{utt_id}-wave.npy"), audio.astype(np.float32), allow_pickle=False, ) else: raise ValueError("support only hdf5 or npy format.")
def main(): """Run preprocessing process.""" parser = argparse.ArgumentParser( description="Compute mean and variance of dumped raw features " "(See detail in parallel_wavegan/bin/compute_statistics.py).") parser.add_argument( "--feats-scp", "--scp", default=None, type=str, help="kaldi-style feats.scp file. " "you need to specify either feats-scp or rootdir.", ) parser.add_argument( "--rootdir", type=str, help="directory including feature files. " "you need to specify either feats-scp or rootdir.", ) parser.add_argument( "--config", type=str, required=True, help="yaml format configuration file.", ) parser.add_argument( "--dumpdir", default=None, type=str, required=True, help="directory to save statistics. if not provided, " "stats will be saved in the above root directory. (default=None)", ) parser.add_argument( "--verbose", type=int, default=1, help="logging level. higher is more logging. (default=1)", ) args = parser.parse_args() # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) else: logging.basicConfig( level=logging.WARN, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) logging.warning("Skip DEBUG/INFO messages") # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.feats_scp is not None and args.rootdir is not None) or (args.feats_scp is None and args.rootdir is None): raise ValueError("Please specify either --rootdir or --feats-scp.") # check directory existence if not os.path.exists(args.dumpdir): os.makedirs(args.dumpdir) # get dataset if args.feats_scp is None: if config["format"] == "hdf5": mel_query = "*.h5" mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": mel_query = "*-feats.npy" mel_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") dataset = MelDataset(args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn) else: dataset = MelSCPDataset(args.feats_scp) logging.info(f"The number of files = {len(dataset)}.") # calculate statistics scaler = StandardScaler() for mel in tqdm(dataset): scaler.partial_fit(mel) if config["format"] == "hdf5": write_hdf5( os.path.join(args.dumpdir, "stats.h5"), "mean", scaler.mean_.astype(np.float32), ) write_hdf5( os.path.join(args.dumpdir, "stats.h5"), "scale", scaler.scale_.astype(np.float32), ) else: stats = np.stack([scaler.mean_, scaler.scale_], axis=0) np.save( os.path.join(args.dumpdir, "stats.npy"), stats.astype(np.float32), allow_pickle=False, )
def main(): """Run preprocessing process.""" parser = argparse.ArgumentParser( description="Compute mean and variance of dumped raw features " "(See detail in parallel_wavegan/bin/compute_statistics.py).") parser.add_argument("--feats-scp", "--scp", default=None, type=str, help="kaldi-style feats.scp file. " "you need to specify either feats-scp or rootdir.") parser.add_argument("--rootdir", type=str, required=True, help="directory including feature files. " "you need to specify either feats-scp or rootdir.") parser.add_argument("--config", type=str, required=True, help="yaml format configuration file.") parser.add_argument("--dumpdir", default=None, type=str, help="directory to save statistics. if not provided, " "stats will be saved in the above root directory. (default=None)") parser.add_argument("--ftype", default='mel', type=str, help="feature type") parser.add_argument("--verbose", type=int, default=1, help="logging level. higher is more logging. (default=1)") # runtime mode args = parser.parse_args() # interactive mode # args = argparse.ArgumentParser() # args.feats_scp = None # args.config = 'egs/so_emo_female/voc1/conf/multi_band_melgan.v2.yaml' # args.verbose = 1 # args.ftype = 'spec' # args.rootdir = '/data/evs/VCTK/VCTK-wgan/spec' # args.rootdir = '/data/evs/Arctic/spec' # args.dumpdir = os.path.join(args.rootdir, "") # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig( level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning('Skip DEBUG/INFO messages') # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check arguments if (args.feats_scp is not None and args.rootdir is not None) or \ (args.feats_scp is None and args.rootdir is None): raise ValueError("Please specify either --rootdir or --feats-scp.") # check directory existence if args.dumpdir is None: args.dumpdir = os.path.dirname(args.rootdir) if not os.path.exists(args.dumpdir): os.makedirs(args.dumpdir) # get dataset if args.feats_scp is None: if config["format"] == "hdf5": mel_query = "*.h5" mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": mel_query = "*.mel.npy" mel_load_fn = np.load spc_query = "*.spec.npy" spc_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") dataset1 = MelDataset( args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn) dataset2 = SpcDataset( args.rootdir, spc_query=spc_query, spc_load_fn=spc_load_fn) else: dataset = MelSCPDataset(args.feats_scp) logging.info(f"The number of files in mel dataset = {len(dataset1)}.") logging.info(f"The number of files in spc dataset = {len(dataset2)}.") # calculate statistics scaler = StandardScaler() if args.ftype == 'mel': for mel in tqdm(dataset1): scaler.partial_fit(mel) elif args.ftype == 'spec': for spc in tqdm(dataset2): scaler.partial_fit(spc) if config["format"] == "hdf5": write_hdf5(os.path.join(args.dumpdir, "{}_mean_std.h5".format(args.ftype)), "mean", scaler.mean_.astype(np.float32)) write_hdf5(os.path.join(args.dumpdir, "{}_mean_std.h5".format(args.ftype)), "scale", scaler.scale_.astype(np.float32)) else: stats = np.stack([scaler.mean_, scaler.scale_], axis=0) np.save(os.path.join(args.dumpdir, "{}_mean_std.npy".format(args.ftype)), stats.astype(np.float32), allow_pickle=False)
def main(): """Run preprocessing process.""" parser = argparse.ArgumentParser( description="Compute mean and variance of dumped raw features.") parser.add_argument("--rootdir", default=None, type=str, required=True, help="Direcotry including feature files.") parser.add_argument("--dumpdir", default=None, type=str, help="Direcotry to save statistics.") parser.add_argument("--config", default="hparam.yml", type=str, required=True, help="Yaml format configuration file.") parser.add_argument("--verbose", type=int, default=1, help="logging level (higher is more logging)") args = parser.parse_args() # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") elif args.verbose > 0: logging.basicConfig( level=logging.INFO, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig( level=logging.WARN, format= "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning('skip DEBUG/INFO messages') # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) # check direcotry existence if args.dumpdir is None: args.dumpdir = os.path.dirname(args.rootdir) if not os.path.exists(args.dumpdir): os.makedirs(args.dumpdir) # get dataset if config["format"] == "hdf5": mel_query = "*.h5" mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA elif config["format"] == "npy": mel_query = "*-feats.npy" mel_load_fn = np.load else: raise ValueError("support only hdf5 or npy format.") dataset = MelDataset(args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn) logging.info(f"the number of files = {len(dataset)}.") # calculate statistics scaler = StandardScaler() for mel in tqdm(dataset): scaler.partial_fit(mel) if config["format"] == "hdf5": write_hdf5(os.path.join(args.dumpdir, "stats.h5"), "mean", scaler.mean_.astype(np.float32)) write_hdf5(os.path.join(args.dumpdir, "stats.h5"), "scale", scaler.scale_.astype(np.float32)) else: stats = np.stack([scaler.mean_, scaler.scale_], axis=0) np.save(os.path.join(args.dumpdir, "stats.npy"), stats.astype(np.float32), allow_pickle=False)