Example #1
0
 def test_decode(self):
     for bit in range(1, 16 + 1):
         with self.subTest(bit=bit):
             coarse = np.arange(2**bit).astype(np.int32)
             w = decode_single(coarse, bit=bit)
             np.testing.assert_equal(
                 w,
                 np.linspace(-1, 1, num=2**bit).astype(np.float32))
Example #2
0
 def convert_to_dict(self, wave: np.ndarray, silence: np.ndarray,
                     local: np.ndarray):
     if self.mulaw:
         wave = encode_mulaw(wave, mu=2**self.bit)
     if self.to_double:
         assert self.bit == 16
         encoded_coarse, encoded_fine = encode_16bit(wave)
         coarse = decode_single(encoded_coarse).astype(np.float32)
         fine = decode_single(encoded_fine).astype(np.float32)[:-1]
     else:
         encoded_coarse = encode_single(wave, bit=self.bit)
         encoded_fine = None
         coarse = wave.ravel().astype(np.float32)
         fine = None
     return dict(
         coarse=coarse,
         fine=fine,
         encoded_coarse=encoded_coarse,
         encoded_fine=encoded_fine,
         local=local,
         silence=silence[1:],
     )
Example #3
0
    def main_forward(
        self,
        length: int,
        sampling_policy: SamplingPolicy,
        num_generate: int,
        local_array: np.ndarray = None,
        s_one: np.ndarray = None,
    ):
        if self.model.with_local:
            with chainer.using_config("train", False), chainer.using_config(
                    "enable_backprop", False):
                local_array = self.model.forward_encode(l_array=local_array,
                                                        s_one=s_one).data

        c = self.xp.zeros([num_generate], dtype=np.float32)
        c = encode_single(c, bit=self.single_bit)

        hidden_coarse = self.model.gru.init_hx(local_array)[0].data

        if self.use_cpp_inference and sampling_policy == SamplingPolicy.random:
            import yukarin_autoreg_cpp

            wave = np.zeros((length, num_generate), dtype=np.int32)
            yukarin_autoreg_cpp.inference(
                batch_size=num_generate,
                length=length,
                output=wave,
                x=to_numpy(c),
                l_array=to_numpy(self.xp.transpose(local_array, (1, 0, 2))),
                hidden=to_numpy(hidden_coarse),
            )
        else:
            if sampling_policy == SamplingPolicy.random:
                fast_forward_params = get_fast_forward_params(self.model)
                w_list = fast_generate(
                    length=length,
                    x=c,
                    l_array=local_array,
                    h=hidden_coarse,
                    **fast_forward_params,
                )
            else:
                w_list = []
                hc = hidden_coarse
                for i in tqdm(range(length), desc="generate"):
                    with chainer.using_config("train",
                                              False), chainer.using_config(
                                                  "enable_backprop", False):
                        c, hc = self.model.forward_one(
                            prev_x=c,
                            prev_l=local_array[:, i],
                            hidden=hc,
                        )

                    if sampling_policy == SamplingPolicy.random:
                        is_random = True
                    elif sampling_policy == SamplingPolicy.maximum:
                        is_random = False
                    else:
                        raise ValueError(sampling_policy)

                    c = self.model.sampling(c, maximum=not is_random)
                    w_list.append(c)

            wave = self.xp.stack(w_list)
            wave = cuda.to_cpu(wave)

        wave = wave.T
        wave = decode_single(wave, bit=self.single_bit)
        if self.mulaw:
            wave = decode_mulaw(wave, mu=2**self.single_bit)

        return [
            Wave(wave=w_one, sampling_rate=self.sampling_rate)
            for w_one in wave
        ]
Example #4
0
 def test_decode_one_value(self):
     self.assertEqual(decode_single(0), -1)
     self.assertEqual(decode_single(255), 1)
Example #5
0
def main():
    model_dir: Path = arguments.model_dir
    model_iteration: int = arguments.model_iteration
    model_config: Path = arguments.model_config
    time_length: float = arguments.time_length
    gpu: int = arguments.gpu

    config = create_config(model_config)
    model_path = _get_predictor_model_path(model_dir, model_iteration)

    sr = config.dataset.sampling_rate

    model = create_predictor(config.model)
    chainer.serializers.load_npz(str(model_path), model)
    if gpu is not None:
        model.to_gpu(gpu)
        cuda.get_device_from_id(gpu).use()

    chainer.global_config.train = False
    chainer.global_config.enable_backprop = False

    wave_paths = sorted([Path(p) for p in glob.glob(str(config.dataset.input_wave_glob))])
    local_paths = sorted([Path(p) for p in glob.glob(str(config.dataset.input_local_glob))])
    assert len(wave_paths) == len(local_paths)

    np.random.RandomState(config.dataset.seed).shuffle(wave_paths)
    np.random.RandomState(config.dataset.seed).shuffle(local_paths)
    wave_path = wave_paths[0]
    local_path = local_paths[0]
    w_data = Wave.load(wave_path, sampling_rate=sr)
    l_data = SamplingData.load(local_path)

    length = int(sr * time_length)
    l_scale = int(sr // l_data.rate)
    l_sl = length // l_scale
    length = l_sl * l_scale

    w = w_data.wave[:length]
    l = l_data.array[:l_sl]
    coarse, fine = encode_16bit(w)

    c, f, hc, hf = model(
        c_array=decode_single(model.xp.asarray(coarse)).astype(np.float32)[np.newaxis],
        f_array=decode_single(model.xp.asarray(fine)).astype(np.float32)[:-1][np.newaxis],
        l_array=model.xp.asarray(l)[np.newaxis],
    )

    c = chainer.functions.softmax(c)

    c = chainer.cuda.to_cpu(c[0].data)
    f = chainer.cuda.to_cpu(f[0].data)

    fig = plt.figure(figsize=[32 * time_length, 10])

    plt.imshow(c, aspect='auto', interpolation='nearest')
    plt.colorbar()

    plt.plot((w + 1) * 127.5, 'g', linewidth=0.1, label='true')
    plt.plot(np.argmax(c, axis=0) + np.argmax(f, axis=0) / 256, 'r', linewidth=0.1, label='predicted')
    plt.legend()

    fig.savefig('output.eps')
Example #6
0
    def generate(
        self,
        time_length: Optional[float],
        sampling_policy: SamplingPolicy,
        num_generate: int,
        coarse=None,
        local_array: np.ndarray = None,
        speaker_nums: List[int] = None,
        hidden_coarse=None,
    ):
        assert num_generate <= self.max_batch_size
        assert coarse is None or len(coarse) == num_generate
        assert local_array is None or len(local_array) == num_generate
        assert speaker_nums is None or len(speaker_nums) == num_generate
        assert hidden_coarse is None or len(hidden_coarse) == num_generate
        assert sampling_policy == SamplingPolicy.random

        length = int(self.sampling_rate * time_length)

        if local_array is None:
            local_array = self.xp.empty((num_generate, length, 0),
                                        dtype=np.float32)
        else:
            local_array = self.xp.asarray(local_array)

        if speaker_nums is not None:
            speaker_nums = self.xp.asarray(speaker_nums).reshape((-1, ))
            with chainer.using_config("train", False), chainer.using_config(
                    "enable_backprop", False):
                s_one = self.model.forward_speaker(speaker_nums).data
        else:
            s_one = None

        if self.model.with_local:
            with chainer.using_config("train", False), chainer.using_config(
                    "enable_backprop", False):
                local_array = self.model.forward_encode(l_array=local_array,
                                                        s_one=s_one).data

        if coarse is None:
            c = self.xp.zeros([num_generate], dtype=np.float32)
            c = encode_single(c, bit=self.single_bit)
        else:
            c = coarse

        if hidden_coarse is None:
            hidden_coarse = self.model.gru.init_hx(local_array)[0].data

        wave = np.zeros((length, num_generate), dtype=np.int32)
        yukarin_autoreg_cpp.inference(
            batch_size=num_generate,
            length=length,
            output=wave,
            x=to_numpy(c),
            l_array=to_numpy(self.xp.transpose(local_array, (1, 0, 2))),
            hidden=to_numpy(hidden_coarse),
        )

        wave = wave.T
        wave = decode_single(wave, bit=self.single_bit)
        if self.mulaw:
            wave = decode_mulaw(wave, mu=2**self.single_bit)

        return [
            Wave(wave=w_one, sampling_rate=self.sampling_rate)
            for w_one in wave
        ]