def setup_loader(ap, is_val=False, verbose=False): loader = None if not is_val or c.run_eval: dataset = GANDataset(ap=ap, items=eval_data if is_val else train_data, seq_len=c.seq_len, hop_len=ap.hop_length, pad_short=c.pad_short, conv_pad=c.conv_pad, is_training=not is_val, return_segments=not is_val, use_noise_augment=c.use_noise_augment, use_cache=c.use_cache, verbose=verbose) dataset.shuffle_mapping() sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None loader = DataLoader(dataset, batch_size=1 if is_val else c.batch_size, shuffle=num_gpus == 0, drop_last=False, sampler=sampler, num_workers=c.num_val_loader_workers if is_val else c.num_loader_workers, pin_memory=False) return loader
def get_data_loader( # pylint: disable=no-self-use self, config: Coqpit, ap: AudioProcessor, is_eval: True, data_items: List, verbose: bool, num_gpus: int, ): """Initiate and return the GAN dataloader. Args: config (Coqpit): Model config. ap (AudioProcessor): Audio processor. is_eval (True): Set the dataloader for evaluation if true. data_items (List): Data samples. verbose (bool): Log information if true. num_gpus (int): Number of GPUs in use. Returns: DataLoader: Torch dataloader. """ dataset = GANDataset( ap=ap, items=data_items, seq_len=config.seq_len, hop_len=ap.hop_length, pad_short=config.pad_short, conv_pad=config.conv_pad, return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False, is_training=not is_eval, return_segments=not is_eval, use_noise_augment=config.use_noise_augment, use_cache=config.use_cache, verbose=verbose, ) dataset.shuffle_mapping() sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None loader = DataLoader( dataset, batch_size=1 if is_eval else config.batch_size, shuffle=num_gpus == 0, drop_last=False, sampler=sampler, num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, pin_memory=False, ) return loader
def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: List, verbose: bool) -> Dataset: if config.model.lower() in "gan": dataset = GANDataset( ap=ap, items=data_items, seq_len=config.seq_len, hop_len=ap.hop_length, pad_short=config.pad_short, conv_pad=config.conv_pad, return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False, is_training=not is_eval, return_segments=not is_eval, use_noise_augment=config.use_noise_augment, use_cache=config.use_cache, verbose=verbose, ) dataset.shuffle_mapping() elif config.model.lower() == "wavegrad": dataset = WaveGradDataset( ap=ap, items=data_items, seq_len=config.seq_len, hop_len=ap.hop_length, pad_short=config.pad_short, conv_pad=config.conv_pad, is_training=not is_eval, return_segments=True, use_noise_augment=False, use_cache=config.use_cache, verbose=verbose, ) elif config.model.lower() == "wavernn": dataset = WaveRNNDataset( ap=ap, items=data_items, seq_len=config.seq_len, hop_len=ap.hop_length, pad=config.model_params.pad, mode=config.model_params.mode, mulaw=config.model_params.mulaw, is_training=not is_eval, verbose=verbose, ) else: raise ValueError(f" [!] Dataset for model {config.model.lower()} cannot be found.") return dataset
def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, use_noise_augment, use_cache, num_workers): ''' run dataloader with given parameters and check conditions ''' ap = AudioProcessor(**C.audio) _, train_items = load_wav_data(test_data_path, 10) dataset = GANDataset(ap, train_items, seq_len=seq_len, hop_len=hop_len, pad_short=2000, conv_pad=conv_pad, return_segments=return_segments, use_noise_augment=use_noise_augment, use_cache=use_cache) loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) max_iter = 10 count_iter = 0 # return random segments or return the whole audio if return_segments: for item1, _ in loader: feat1, wav1 = item1 # feat2, wav2 = item2 expected_feat_shape = (batch_size, ap.num_mels, seq_len // hop_len + conv_pad * 2) # check shapes assert np.all(feat1.shape == expected_feat_shape ), f" [!] {feat1.shape} vs {expected_feat_shape}" assert (feat1.shape[2] - conv_pad * 2) * hop_len == wav1.shape[2] # check feature vs audio match if not use_noise_augment: for idx in range(batch_size): audio = wav1[idx].squeeze() feat = feat1[idx] mel = ap.melspectrogram(audio) # the first 2 and the last frame is skipped due to the padding # applied in spec. computation. assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum( ) == 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum()}' count_iter += 1 # if count_iter == max_iter: # break else: for item in loader: feat, wav = item expected_feat_shape = (batch_size, ap.num_mels, (wav.shape[-1] // hop_len) + (conv_pad * 2)) assert np.all(feat.shape == expected_feat_shape ), f" [!] {feat.shape} vs {expected_feat_shape}" assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2] count_iter += 1 if count_iter == max_iter: break
def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_pairs, return_segments, use_noise_augment, use_cache, num_workers): """Run dataloader with given parameters and check conditions""" ap = AudioProcessor(**C.audio) _, train_items = load_wav_data(test_data_path, 10) dataset = GANDataset( ap, train_items, seq_len=seq_len, hop_len=hop_len, pad_short=2000, conv_pad=conv_pad, return_pairs=return_pairs, return_segments=return_segments, use_noise_augment=use_noise_augment, use_cache=use_cache, ) loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) max_iter = 10 count_iter = 0 def check_item(feat, wav): """Pass a single pair of features and waveform""" feat = feat.numpy() wav = wav.numpy() expected_feat_shape = (batch_size, ap.num_mels, seq_len // hop_len + conv_pad * 2) # check shapes assert np.all(feat.shape == expected_feat_shape ), f" [!] {feat.shape} vs {expected_feat_shape}" assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2] # check feature vs audio match if not use_noise_augment: for idx in range(batch_size): audio = wav[idx].squeeze() feat = feat[idx] mel = ap.melspectrogram(audio) # the first 2 and the last 2 frames are skipped due to the padding # differences in stft max_diff = abs((feat - mel[:, :feat.shape[-1]])[:, 2:-2]).max() assert max_diff <= 1e-6, f" [!] {max_diff}" # return random segments or return the whole audio if return_segments: if return_pairs: for item1, item2 in loader: feat1, wav1 = item1 feat2, wav2 = item2 check_item(feat1, wav1) check_item(feat2, wav2) count_iter += 1 else: for item1 in loader: feat1, wav1 = item1 check_item(feat1, wav1) count_iter += 1 else: for item in loader: feat, wav = item expected_feat_shape = (batch_size, ap.num_mels, (wav.shape[-1] // hop_len) + (conv_pad * 2)) assert np.all(feat.shape == expected_feat_shape ), f" [!] {feat.shape} vs {expected_feat_shape}" assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2] count_iter += 1 if count_iter == max_iter: break