def test_decode(self): for bit in range(1, 16 + 1): with self.subTest(bit=bit): coarse = np.arange(2**bit).astype(np.int32) w = decode_single(coarse, bit=bit) np.testing.assert_equal( w, np.linspace(-1, 1, num=2**bit).astype(np.float32))
def convert_to_dict(self, wave: np.ndarray, silence: np.ndarray, local: np.ndarray): if self.mulaw: wave = encode_mulaw(wave, mu=2**self.bit) if self.to_double: assert self.bit == 16 encoded_coarse, encoded_fine = encode_16bit(wave) coarse = decode_single(encoded_coarse).astype(np.float32) fine = decode_single(encoded_fine).astype(np.float32)[:-1] else: encoded_coarse = encode_single(wave, bit=self.bit) encoded_fine = None coarse = wave.ravel().astype(np.float32) fine = None return dict( coarse=coarse, fine=fine, encoded_coarse=encoded_coarse, encoded_fine=encoded_fine, local=local, silence=silence[1:], )
def main_forward( self, length: int, sampling_policy: SamplingPolicy, num_generate: int, local_array: np.ndarray = None, s_one: np.ndarray = None, ): if self.model.with_local: with chainer.using_config("train", False), chainer.using_config( "enable_backprop", False): local_array = self.model.forward_encode(l_array=local_array, s_one=s_one).data c = self.xp.zeros([num_generate], dtype=np.float32) c = encode_single(c, bit=self.single_bit) hidden_coarse = self.model.gru.init_hx(local_array)[0].data if self.use_cpp_inference and sampling_policy == SamplingPolicy.random: import yukarin_autoreg_cpp wave = np.zeros((length, num_generate), dtype=np.int32) yukarin_autoreg_cpp.inference( batch_size=num_generate, length=length, output=wave, x=to_numpy(c), l_array=to_numpy(self.xp.transpose(local_array, (1, 0, 2))), hidden=to_numpy(hidden_coarse), ) else: if sampling_policy == SamplingPolicy.random: fast_forward_params = get_fast_forward_params(self.model) w_list = fast_generate( length=length, x=c, l_array=local_array, h=hidden_coarse, **fast_forward_params, ) else: w_list = [] hc = hidden_coarse for i in tqdm(range(length), desc="generate"): with chainer.using_config("train", False), chainer.using_config( "enable_backprop", False): c, hc = self.model.forward_one( prev_x=c, prev_l=local_array[:, i], hidden=hc, ) if sampling_policy == SamplingPolicy.random: is_random = True elif sampling_policy == SamplingPolicy.maximum: is_random = False else: raise ValueError(sampling_policy) c = self.model.sampling(c, maximum=not is_random) w_list.append(c) wave = self.xp.stack(w_list) wave = cuda.to_cpu(wave) wave = wave.T wave = decode_single(wave, bit=self.single_bit) if self.mulaw: wave = decode_mulaw(wave, mu=2**self.single_bit) return [ Wave(wave=w_one, sampling_rate=self.sampling_rate) for w_one in wave ]
def test_decode_one_value(self): self.assertEqual(decode_single(0), -1) self.assertEqual(decode_single(255), 1)
def main(): model_dir: Path = arguments.model_dir model_iteration: int = arguments.model_iteration model_config: Path = arguments.model_config time_length: float = arguments.time_length gpu: int = arguments.gpu config = create_config(model_config) model_path = _get_predictor_model_path(model_dir, model_iteration) sr = config.dataset.sampling_rate model = create_predictor(config.model) chainer.serializers.load_npz(str(model_path), model) if gpu is not None: model.to_gpu(gpu) cuda.get_device_from_id(gpu).use() chainer.global_config.train = False chainer.global_config.enable_backprop = False wave_paths = sorted([Path(p) for p in glob.glob(str(config.dataset.input_wave_glob))]) local_paths = sorted([Path(p) for p in glob.glob(str(config.dataset.input_local_glob))]) assert len(wave_paths) == len(local_paths) np.random.RandomState(config.dataset.seed).shuffle(wave_paths) np.random.RandomState(config.dataset.seed).shuffle(local_paths) wave_path = wave_paths[0] local_path = local_paths[0] w_data = Wave.load(wave_path, sampling_rate=sr) l_data = SamplingData.load(local_path) length = int(sr * time_length) l_scale = int(sr // l_data.rate) l_sl = length // l_scale length = l_sl * l_scale w = w_data.wave[:length] l = l_data.array[:l_sl] coarse, fine = encode_16bit(w) c, f, hc, hf = model( c_array=decode_single(model.xp.asarray(coarse)).astype(np.float32)[np.newaxis], f_array=decode_single(model.xp.asarray(fine)).astype(np.float32)[:-1][np.newaxis], l_array=model.xp.asarray(l)[np.newaxis], ) c = chainer.functions.softmax(c) c = chainer.cuda.to_cpu(c[0].data) f = chainer.cuda.to_cpu(f[0].data) fig = plt.figure(figsize=[32 * time_length, 10]) plt.imshow(c, aspect='auto', interpolation='nearest') plt.colorbar() plt.plot((w + 1) * 127.5, 'g', linewidth=0.1, label='true') plt.plot(np.argmax(c, axis=0) + np.argmax(f, axis=0) / 256, 'r', linewidth=0.1, label='predicted') plt.legend() fig.savefig('output.eps')
def generate( self, time_length: Optional[float], sampling_policy: SamplingPolicy, num_generate: int, coarse=None, local_array: np.ndarray = None, speaker_nums: List[int] = None, hidden_coarse=None, ): assert num_generate <= self.max_batch_size assert coarse is None or len(coarse) == num_generate assert local_array is None or len(local_array) == num_generate assert speaker_nums is None or len(speaker_nums) == num_generate assert hidden_coarse is None or len(hidden_coarse) == num_generate assert sampling_policy == SamplingPolicy.random length = int(self.sampling_rate * time_length) if local_array is None: local_array = self.xp.empty((num_generate, length, 0), dtype=np.float32) else: local_array = self.xp.asarray(local_array) if speaker_nums is not None: speaker_nums = self.xp.asarray(speaker_nums).reshape((-1, )) with chainer.using_config("train", False), chainer.using_config( "enable_backprop", False): s_one = self.model.forward_speaker(speaker_nums).data else: s_one = None if self.model.with_local: with chainer.using_config("train", False), chainer.using_config( "enable_backprop", False): local_array = self.model.forward_encode(l_array=local_array, s_one=s_one).data if coarse is None: c = self.xp.zeros([num_generate], dtype=np.float32) c = encode_single(c, bit=self.single_bit) else: c = coarse if hidden_coarse is None: hidden_coarse = self.model.gru.init_hx(local_array)[0].data wave = np.zeros((length, num_generate), dtype=np.int32) yukarin_autoreg_cpp.inference( batch_size=num_generate, length=length, output=wave, x=to_numpy(c), l_array=to_numpy(self.xp.transpose(local_array, (1, 0, 2))), hidden=to_numpy(hidden_coarse), ) wave = wave.T wave = decode_single(wave, bit=self.single_bit) if self.mulaw: wave = decode_mulaw(wave, mu=2**self.single_bit) return [ Wave(wave=w_one, sampling_rate=self.sampling_rate) for w_one in wave ]