def generate(self): wave = Wave.load(self.path_wave) try: local = SamplingData.load(self.path_local) except: local_rate = 80 local_array = to_log_melspectrogram(wave=wave, rate=local_rate) local = SamplingData(array=local_array, rate=local_rate) with NamedTemporaryFile(suffix=".npy", delete=False) as f: self.path_local = Path(f.name) local.save(self.path_local) return Input( wave=wave, silence=SamplingData.load(self.path_silence), local=local, )
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, to_voiced_scaler: bool, to_f0_scaler: bool, to_phoneme_onehot: bool, batch_size: Optional[int], num_test: int, target_glob: Optional[str], use_gpu: bool, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) generator = Generator( config=config, predictor=_get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="predictor_", ), voiced_network=( None if not to_voiced_scaler else _get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="voiced_network_", ) ), f0_network=( None if not to_f0_scaler else _get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="f0_network_", ) ), phoneme_network=( None if not to_phoneme_onehot else _get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="phoneme_network_", ) ), use_gpu=use_gpu, ) dataset = create_dataset(config.dataset)["test"] scale = numpy.prod(config.network.scale_list) if batch_size is None: batch_size = config.train.batch_size if isinstance(dataset, SpeakerWavesDataset): wave_paths = [data.path_wave for data in dataset.wave_dataset.inputs[:num_test]] elif isinstance(dataset, WavesDataset): wave_paths = [data.path_wave for data in dataset.inputs[:num_test]] else: raise Exception() if target_glob is not None: wave_paths += list(map(Path, glob(target_glob))) for wps in tqdm(chunked(wave_paths, batch_size), desc="generate"): waves = [Wave.load(p) for p in wps] arrays = [w.wave for w in waves] pad_lengths = [int(numpy.ceil(len(w) / scale) * scale) for w in arrays] arrays = [numpy.r_[w, numpy.zeros(max(pad_lengths) - len(w))] for w in arrays] tensors = [torch.from_numpy(array.astype(numpy.float32)) for array in arrays] output = generator.generate( wave=concat_examples(tensors), to_voiced_scaler=to_voiced_scaler, to_f0_scaler=to_f0_scaler, to_phoneme_onehot=to_phoneme_onehot, ) for feature, p, w, l in zip(output, wps, waves, pad_lengths): feature = feature.T[: l // scale] data = SamplingData(array=feature, rate=w.sampling_rate // scale) data.save(output_dir / (p.stem + ".npy"))