def __init__(self, root_dir, audio_query="*.h5", mel_query="*.h5", audio_load_fn=lambda x: read_hdf5(x, "wave"), mel_load_fn=lambda x: read_hdf5(x, "feats"), audio_length_threshold=None, mel_length_threshold=None, return_filename=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. audio_query (str): Query to find audio files in root_dir. mel_query (str): Query to find feature files in root_dir. audio_load_fn (func): Function to load audio file. mel_load_fn (func): Function to load feature file. audio_length_threshold (int): Threshold to remove short audio files. mel_length_threshold (int): Threshold to remove short feature files. return_filename (bool): Whether to return the filename with arrays. """ # find all of audio and mel files audio_files = sorted(find_files(root_dir, audio_query)) mel_files = sorted(find_files(root_dir, mel_query)) # filter by threshold if audio_length_threshold is not None: audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files] idxs = [idx for idx in range(len(audio_files)) if audio_lengths[idx] > audio_length_threshold] if len(audio_files) != len(idxs): logging.info(f"some files are filtered by audio length threshold " f"({len(audio_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] mel_files = [mel_files[idx] for idx in idxs] if mel_length_threshold is not None: mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] idxs = [idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold] if len(mel_files) != len(idxs): logging.info(f"some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] mel_files = [mel_files[idx] for idx in idxs] # assert the number of files assert len(audio_files) != 0, f"Not found any audio files in ${root_dir}." assert len(audio_files) == len(mel_files), \ f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})." self.audio_files = audio_files self.mel_files = mel_files self.audio_load_fn = audio_load_fn self.mel_load_fn = mel_load_fn self.return_filename = return_filename
def __init__( self, root_dir, mel_query="*-feats.npy", mel_length_threshold=None, mel_load_fn=np.load, return_utt_id=False, allow_cache=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. mel_query (str): Query to find feature files in root_dir. mel_load_fn (func): Function to load feature file. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. allow_cache (bool): Whether to allow cache of the loaded files. """ # find all of the mel files mel_files = sorted(find_files(root_dir, mel_query)) # filter by threshold if mel_length_threshold is not None: mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] idxs = [ idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold ] if len(mel_files) != len(idxs): logging.warning( f"Some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") mel_files = [mel_files[idx] for idx in idxs] # assert the number of files assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}." self.mel_files = mel_files self.mel_load_fn = mel_load_fn self.utt_ids = [ os.path.splitext(os.path.basename(f))[0] for f in mel_files ] if ".npy" in mel_query: self.utt_ids = [ os.path.basename(f).replace("-feats.npy", "") for f in mel_files ] else: self.utt_ids = [ os.path.splitext(os.path.basename(f))[0] for f in mel_files ] self.return_utt_id = return_utt_id self.allow_cache = allow_cache if allow_cache: # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0 self.manager = Manager() self.caches = self.manager.list() self.caches += [() for _ in range(len(mel_files))]
def __init__(self, root_dir, mel_query="*-feats.npy", mel_length_threshold=None, mel_load_fn=np.load, return_filename=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. mel_query (str): Query to find feature files in root_dir. mel_load_fn (func): Function to load feature file. mel_length_threshold (int): Threshold to remove short feature files. return_filename (bool): Whether to return the filename with arrays. """ # find all of the mel files mel_files = sorted(find_files(root_dir, mel_query)) # filter by threshold if mel_length_threshold is not None: mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] idxs = [idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold] if len(mel_files) != len(idxs): logging.info(f"some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") mel_files = [mel_files[idx] for idx in idxs] # assert the number of files assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}." self.mel_files = mel_files self.mel_load_fn = mel_load_fn self.return_filename = return_filename
def main(): parser = argparse.ArgumentParser( description= 'Convert filter banks to waveform using Griffin-Lim algorithm', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--conf', type=str, required=True, help='Cofiguration file') parser.add_argument('--rootdir', type=str, required=True, help='Root directory of filter bank h5 files') parser.add_argument('--outdir', type=str, required=True, help='Output directory') args = parser.parse_args() # logging info logging.basicConfig( level=logging.INFO, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) " "%(levelname)s: %(message)s", ) # load configure files conf = load_yaml(args.conf) for k, v in conf.items(): logging.info("{}: {}".format(k, v)) # find h5 files feats_files = find_files(args.rootdir, "*.h5") feats = { os.path.join(args.outdir, os.path.basename(filename).replace(".h5", ".wav")): read_feature(filename, "feats") for filename in feats_files } # Main Griffin-Lim algorithm Parallel(n_jobs=30)([ delayed(mlfb2wavf)( feats[wavf], wavf, fs=conf["feature"]["fs"], n_mels=conf["feature"]["mlfb_dim"], fftl=conf["feature"]["fftl"], hop_size=conf["feature"]["hop_size"], plot=False, ) for wavf in list(feats.keys()) ])
def __init__( self, root_dir, audio_query="*-wave.npy", audio_length_threshold=None, audio_load_fn=np.load, return_filename=False, allow_cache=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. audio_query (str): Query to find audio files in root_dir. audio_load_fn (func): Function to load audio file. audio_length_threshold (int): Threshold to remove short audio files. return_filename (bool): Whether to return the filename with arrays. allow_cache (bool): Whether to allow cache of the loaded files. """ # find all of audio and mel files audio_files = sorted(find_files(root_dir, audio_query)) # filter by threshold if audio_length_threshold is not None: audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files] idxs = [ idx for idx in range(len(audio_files)) if audio_lengths[idx] > audio_length_threshold ] if len(audio_files) != len(idxs): logging.waning( f"some files are filtered by audio length threshold " f"({len(audio_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] # assert the number of files assert len( audio_files) != 0, f"Not found any audio files in ${root_dir}." self.audio_files = audio_files self.audio_load_fn = audio_load_fn self.return_filename = return_filename self.allow_cache = allow_cache if allow_cache: # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0 self.manager = Manager() self.caches = self.manager.list() self.caches += [() for _ in range(len(audio_files))]
def __init__(self, root_dir, audio_query="*.h5", mel_query="*.h5", audio_load_fn=lambda x: read_hdf5(x, "wave"), mel_load_fn=lambda x: read_hdf5(x, "feats"), audio_length_threshold=None, mel_length_threshold=None, return_utt_id=False, return_spk_id=False, allow_cache=False, ): """Initialize dataset. Args: root_dir (str): Root directory including dumped files. audio_query (str): Query to find audio files in root_dir. mel_query (str): Query to find feature files in root_dir. audio_load_fn (func): Function to load audio file. mel_load_fn (func): Function to load feature file. audio_length_threshold (int): Threshold to remove short audio files. mel_length_threshold (int): Threshold to remove short feature files. return_utt_id (bool): Whether to return the utterance id with arrays. allow_cache (bool): Whether to allow cache of the loaded files. """ # find all of audio and mel files audio_files = sorted(find_files(root_dir, audio_query)) mel_files = sorted(find_files(root_dir, mel_query)) # filter by threshold if audio_length_threshold is not None: audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files] idxs = [idx for idx in range(len(audio_files)) if audio_lengths[idx] > audio_length_threshold] if len(audio_files) != len(idxs): logging.warning(f"Some files are filtered by audio length threshold " f"({len(audio_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] mel_files = [mel_files[idx] for idx in idxs] if mel_length_threshold is not None: mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files] idxs = [idx for idx in range(len(mel_files)) if mel_lengths[idx] > mel_length_threshold] if len(mel_files) != len(idxs): logging.warning(f"Some files are filtered by mel length threshold " f"({len(mel_files)} -> {len(idxs)}).") audio_files = [audio_files[idx] for idx in idxs] mel_files = [mel_files[idx] for idx in idxs] # assert the number of files assert len(audio_files) != 0, f"Not found any audio files in ${root_dir}." assert len(audio_files) == len(mel_files), \ f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})." self.audio_files = audio_files self.audio_load_fn = audio_load_fn self.mel_load_fn = mel_load_fn self.mel_files = mel_files if ".npy" in audio_query: self.utt_ids = [os.path.basename(f).replace("-wave.npy", "") for f in audio_files] else: self.utt_ids = [os.path.splitext(os.path.basename(f))[0] for f in audio_files] # only meaningful for vctk self.return_utt_id = return_utt_id self.spk_ids = list(set([self.get_spk_id(s) for s in self.utt_ids])) self.spk_ids.sort() #self.spk_id_encoder = LabelBinarizer().fit(self.spk_ids) self.spk_id_encoder = LabelEncoder().fit(self.spk_ids) self.return_spk_id = return_spk_id self.allow_cache = allow_cache if allow_cache: # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0 self.manager = Manager() self.caches = self.manager.list() self.caches += [() for _ in range(len(audio_files))]