コード例 #1
0
    def main_forward(
        self,
        length: int,
        sampling_policy: SamplingPolicy,
        num_generate: int,
        local_array: np.ndarray = None,
        s_one: np.ndarray = None,
    ):
        if self.model.with_local:
            with chainer.using_config("train", False), chainer.using_config(
                    "enable_backprop", False):
                local_array = self.model.forward_encode(l_array=local_array,
                                                        s_one=s_one).data

        c = self.xp.zeros([num_generate], dtype=np.float32)
        c = encode_single(c, bit=self.single_bit)

        hidden_coarse = self.model.gru.init_hx(local_array)[0].data

        if self.use_cpp_inference and sampling_policy == SamplingPolicy.random:
            import yukarin_autoreg_cpp

            wave = np.zeros((length, num_generate), dtype=np.int32)
            yukarin_autoreg_cpp.inference(
                batch_size=num_generate,
                length=length,
                output=wave,
                x=to_numpy(c),
                l_array=to_numpy(self.xp.transpose(local_array, (1, 0, 2))),
                hidden=to_numpy(hidden_coarse),
            )
        else:
            if sampling_policy == SamplingPolicy.random:
                fast_forward_params = get_fast_forward_params(self.model)
                w_list = fast_generate(
                    length=length,
                    x=c,
                    l_array=local_array,
                    h=hidden_coarse,
                    **fast_forward_params,
                )
            else:
                w_list = []
                hc = hidden_coarse
                for i in tqdm(range(length), desc="generate"):
                    with chainer.using_config("train",
                                              False), chainer.using_config(
                                                  "enable_backprop", False):
                        c, hc = self.model.forward_one(
                            prev_x=c,
                            prev_l=local_array[:, i],
                            hidden=hc,
                        )

                    if sampling_policy == SamplingPolicy.random:
                        is_random = True
                    elif sampling_policy == SamplingPolicy.maximum:
                        is_random = False
                    else:
                        raise ValueError(sampling_policy)

                    c = self.model.sampling(c, maximum=not is_random)
                    w_list.append(c)

            wave = self.xp.stack(w_list)
            wave = cuda.to_cpu(wave)

        wave = wave.T
        wave = decode_single(wave, bit=self.single_bit)
        if self.mulaw:
            wave = decode_mulaw(wave, mu=2**self.single_bit)

        return [
            Wave(wave=w_one, sampling_rate=self.sampling_rate)
            for w_one in wave
        ]
コード例 #2
0
    def generate(
        self,
        time_length: float,
        sampling_policy: SamplingPolicy,
        num_generate: int,
        local_array: Union[numpy.ndarray, Tensor] = None,
        speaker_nums: Union[Sequence[int], Tensor] = None,
    ):
        assert num_generate <= self.max_batch_size
        assert local_array is None or len(local_array) == num_generate
        assert speaker_nums is None or len(speaker_nums) == num_generate

        length = int(self.sampling_rate * time_length)

        if local_array is None:
            local_array = torch.empty((num_generate, length, 0)).float()
        local_array = to_tensor(local_array).to(self.device)

        if speaker_nums is not None:
            speaker_nums = to_tensor(speaker_nums).reshape(
                (-1, )).to(self.device)
            with torch.no_grad():
                s_one = self.predictor.forward_speaker(speaker_nums)
        else:
            s_one = None

        if self.predictor.with_local:
            with torch.no_grad():
                local_array = self.predictor.forward_encode(
                    l_array=local_array, s_one=s_one)

        x = numpy.zeros(num_generate, dtype=numpy.float32)
        x = encode_single(x, bit=self.bit_size)

        hidden = numpy.zeros(
            (num_generate, self.predictor.gru.hidden_size),
            dtype=numpy.float32,
        )

        if sampling_policy == SamplingPolicy.corrected_random:
            low_probability_threshold = -18
        else:
            low_probability_threshold = -999

        if self.use_fast_inference and self.use_gpu:
            assert sampling_policy in [
                SamplingPolicy.random,
            ]

            import yukarin_autoreg_cpp

            wave = numpy.zeros((length, num_generate), dtype=numpy.int32)
            yukarin_autoreg_cpp.inference(
                batch_size=num_generate,
                length=length,
                output=wave,
                x=x.astype(numpy.int32),
                l_array=to_numpy(local_array.transpose(0, 1)),
                hidden=to_numpy(hidden),
            )

        elif self.use_fast_inference and not self.use_gpu:
            assert sampling_policy == SamplingPolicy.random

            params = get_fast_forward_params(self.predictor)
            x_list = fast_generate(
                length=length,
                x=x,
                l_array=local_array.numpy(),
                h=hidden,
                **params,
            )
            wave = numpy.stack(x_list)
        else:
            with torch.no_grad():
                x = to_tensor(x).to(self.device)
                x_max = x
                hidden = to_tensor(hidden).to(self.device)
                x_list = []
                for i in tqdm(range(length), desc="generate"):
                    d_max, _ = self.predictor.forward_one(
                        prev_x=x_max, prev_l=local_array[:, i], hidden=hidden)
                    d, hidden = self.predictor.forward_one(
                        prev_x=x, prev_l=local_array[:, i], hidden=hidden)

                    if sampling_policy == SamplingPolicy.maximum:
                        is_random = False
                    else:
                        is_random = True
                        d[F.log_softmax(d_max.double(), dim=1) <
                          low_probability_threshold] -= 200

                    x = self.predictor.sampling(d, maximum=not is_random)
                    x_max = self.predictor.sampling(d, maximum=True)
                    x_list.append(x)

                wave = torch.stack(x_list).cpu().numpy()

        wave = wave.T
        wave = decode_single(wave, bit=self.bit_size)
        if self.mulaw:
            wave = decode_mulaw(wave, mu=2**self.bit_size)

        return [
            Wave(wave=w_one, sampling_rate=self.sampling_rate)
            for w_one in wave
        ]
コード例 #3
0
before_output = None
for batch_size in [1, 2, 4]:
    x = base_x[:batch_size].clone()
    l_array = base_l_array[:, :batch_size].clone()
    hidden = base_hidden[:batch_size].clone()

    # x = model.xp.zeros_like(x)
    # l_array = model.xp.zeros_like(l_array)
    # hidden = model.xp.zeros_like(hidden)

    output = numpy.ones((length, batch_size), dtype=numpy.int32) * -1
    r = yukarin_autoreg_cpp.inference(
        batch_size=batch_size,
        length=length,
        output=output,
        x=to_numpy(x),
        l_array=to_numpy(l_array),
        hidden=to_numpy(hidden),
    )
    print(output)

    if before_output is not None:
        min_batch_size = min(before_output.shape[1], output.shape[1])
        flag = numpy.all(
            before_output[:, :min_batch_size] == output[:, :min_batch_size])
        print("before_output == output :", flag)
    before_output = output

with torch.no_grad():
    expected = torch.stack(
        fast_generate(
コード例 #4
0
    def generate(
        self,
        time_length: Optional[float],
        sampling_policy: SamplingPolicy,
        num_generate: int,
        coarse=None,
        local_array: np.ndarray = None,
        speaker_nums: List[int] = None,
        hidden_coarse=None,
    ):
        assert num_generate <= self.max_batch_size
        assert coarse is None or len(coarse) == num_generate
        assert local_array is None or len(local_array) == num_generate
        assert speaker_nums is None or len(speaker_nums) == num_generate
        assert hidden_coarse is None or len(hidden_coarse) == num_generate
        assert sampling_policy == SamplingPolicy.random

        length = int(self.sampling_rate * time_length)

        if local_array is None:
            local_array = self.xp.empty((num_generate, length, 0),
                                        dtype=np.float32)
        else:
            local_array = self.xp.asarray(local_array)

        if speaker_nums is not None:
            speaker_nums = self.xp.asarray(speaker_nums).reshape((-1, ))
            with chainer.using_config("train", False), chainer.using_config(
                    "enable_backprop", False):
                s_one = self.model.forward_speaker(speaker_nums).data
        else:
            s_one = None

        if self.model.with_local:
            with chainer.using_config("train", False), chainer.using_config(
                    "enable_backprop", False):
                local_array = self.model.forward_encode(l_array=local_array,
                                                        s_one=s_one).data

        if coarse is None:
            c = self.xp.zeros([num_generate], dtype=np.float32)
            c = encode_single(c, bit=self.single_bit)
        else:
            c = coarse

        if hidden_coarse is None:
            hidden_coarse = self.model.gru.init_hx(local_array)[0].data

        wave = np.zeros((length, num_generate), dtype=np.int32)
        yukarin_autoreg_cpp.inference(
            batch_size=num_generate,
            length=length,
            output=wave,
            x=to_numpy(c),
            l_array=to_numpy(self.xp.transpose(local_array, (1, 0, 2))),
            hidden=to_numpy(hidden_coarse),
        )

        wave = wave.T
        wave = decode_single(wave, bit=self.single_bit)
        if self.mulaw:
            wave = decode_mulaw(wave, mu=2**self.single_bit)

        return [
            Wave(wave=w_one, sampling_rate=self.sampling_rate)
            for w_one in wave
        ]