コード例 #1
0
def encode_worker(
    config: Config,
    wrapper: VoiceChangerStreamWrapper,
    audio_config: AudioConfig,
    queue_input: Queue,
    queue_output: Queue,
):
    wrapper.voice_changer_stream.vocoder = Vocoder(
        acoustic_param=config.dataset.acoustic_param,
        out_sampling_rate=audio_config.out_rate,
    )

    start_time = 0
    time_length = audio_config.in_audio_chunk / audio_config.in_rate

    # padding 1s
    prev_original = numpy.zeros(round(time_length * audio_config.in_rate),
                                dtype=numpy.float32)
    w = Wave(wave=prev_original, sampling_rate=audio_config.in_rate)
    wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=w)
    start_time += time_length

    while True:
        item: Item = queue_input.get()
        item.original, prev_original = prev_original, item.original
        wave = item.item

        w = Wave(wave=wave, sampling_rate=audio_config.in_rate)
        wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=w)
        start_time += time_length

        feature_wrapper = wrapper.pre_convert_next(time_length=time_length)
        item.item = feature_wrapper
        queue_output.put(item)
コード例 #2
0
 def separate_effective(self,
                        wave: Wave,
                        feature: AcousticFeature,
                        threshold=None):
     """
     :return: (effective feature, effective flags)
     """
     hop, length = wave.get_hop_and_length(
         frame_period=self._param.frame_period)
     if threshold is None:
         if self._param.threshold_db is not None:
             effective = wave.get_effective_frame(
                 threshold_db=self._param.threshold_db,
                 fft_length=self._param.fft_length,
                 frame_period=self._param.frame_period,
             )
             feature = feature.indexing(effective)
         else:
             effective = numpy.ones(length, dtype=bool)
     else:
         mse = librosa.feature.rmse(y=wave.wave,
                                    frame_length=self._param.fft_length,
                                    hop_length=hop)**2
         effective = (librosa.core.power_to_db(mse.squeeze()) > -threshold)
         if len(effective) < len(feature.f0):  # the divide move
             effective = numpy.r_[effective, False]
         if len(effective) > len(feature.f0):  # the divide move
             effective = effective
         if len(effective) < len(feature.f0):  # the divide move
             effective = numpy.r_[effective, False]
         if len(effective) > len(feature.f0):  # the divide move
             effective = effective
         feature = feature.indexing(effective)
     return feature, effective
コード例 #3
0
def encode_worker(
    config: Config,
    wrapper: VoiceChangerStreamWrapper,
    audio_config: AudioConfig,
    queue_input: Queue,
    queue_output: Queue,
):
    wrapper.voice_changer_stream.vocoder = Vocoder(
        acoustic_param=config.dataset.acoustic_param,
        out_sampling_rate=audio_config.rate,
    )

    start_time = 0
    time_length = audio_config.convert_chunk / audio_config.rate

    z = numpy.zeros(round(time_length * audio_config.rate),
                    dtype=numpy.float32)
    w = Wave(wave=z, sampling_rate=audio_config.rate)
    wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=w)
    start_time += time_length

    while True:
        wave = queue_input.get()

        w = Wave(wave=wave, sampling_rate=audio_config.rate)
        wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=w)
        start_time += time_length

        feature_wrapper = wrapper.pre_convert_next(time_length=time_length)
        queue_output.put(feature_wrapper)
コード例 #4
0
def generate_feature(path: Path):
    out = Path(arguments.output, path.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave = Wave.load(path=path, sampling_rate=arguments.sampling_rate)
    wave = wave.pad(pre_second=arguments.pad_second,
                    post_second=arguments.pad_second)

    # make acoustic feature
    feature = AcousticFeature.extract(
        wave=wave,
        frame_period=arguments.frame_period,
        f0_floor=arguments.f0_floor,
        f0_ceil=arguments.f0_ceil,
        fft_length=arguments.fft_length,
        order=arguments.order,
        alpha=arguments.alpha,
        dtype=arguments.dtype,
    )

    if arguments.threshold_db is not None:
        if arguments.sampling_rate_for_thresholding is not None:
            wave_ref = Wave.load(
                path=path,
                sampling_rate=arguments.sampling_rate_for_thresholding)
            wave_ref = wave_ref.pad(pre_second=arguments.pad_second,
                                    post_second=arguments.pad_second)
        else:
            wave_ref = wave

        effective = wave_ref.get_effective_frame(
            threshold_db=arguments.threshold_db,
            fft_length=arguments.fft_length,
            frame_period=arguments.frame_period,
        )

        # there is possibility mismatch of length
        # https://github.com/mmorise/World/blob/c41e580c24c8d360f322ba6e2092ad4785d2d5b9/src/harvest.cpp#L1220
        len_wave = wave.get_hop_and_length(arguments.frame_period)[1]
        len_wave_ref = wave_ref.get_hop_and_length(arguments.frame_period)[1]
        if len_wave == len_wave_ref - 1:
            effective = effective[:-1]

        feature = feature.indexing(effective)

    # save
    feature.save(path=out, ignores=arguments.ignore_feature)
コード例 #5
0
def generate_feature(path: Path):
    out = Path(arguments.output, path.stem + '.npy')
    if out.exists() and not arguments.enable_overwrite:
        return

    # load wave and padding
    wave = Wave.load(path=path, sampling_rate=arguments.sampling_rate)
    wave = wave.pad(pre_second=arguments.pad_second,
                    post_second=arguments.pad_second)

    # make acoustic feature
    feature = AcousticFeature.extract(
        wave=wave,
        frame_period=arguments.frame_period,
        f0_floor=arguments.f0_floor,
        f0_ceil=arguments.f0_ceil,
        fft_length=arguments.fft_length,
        order=arguments.order,
        alpha=arguments.alpha,
        dtype=arguments.dtype,
    )

    if arguments.threshold_db is not None:
        index = wave.get_effective_frame(
            threshold_db=arguments.threshold_db,
            fft_length=arguments.fft_length,
            frame_period=arguments.frame_period,
        )
        feature = feature.indexing(index)

    # save
    feature.save(path=out, validate=True, ignores=arguments.ignore_feature)
コード例 #6
0
 def concatenate_wrapper(fs: List['AcousticFeatureWrapper'],
                         keys: Iterable[str]):
     return AcousticFeatureWrapper(
         wave=Wave(wave=numpy.concatenate([f.wave.wave for f in fs]),
                   sampling_rate=fs[0].wave.sampling_rate),
         **AcousticFeatureWrapper.concatenate(fs, keys=keys).__dict__,
     )
コード例 #7
0
ファイル: acoustic_converter.py プロジェクト: BURI55/yukarin
 def decode_acoustic_feature(self, feature: AcousticFeature):
     out = pyworld.synthesize(
         f0=feature.f0.ravel(),
         spectrogram=feature.sp,
         aperiodicity=feature.ap,
         fs=self.out_sampling_rate,
         frame_period=self._param.frame_period,
     )
     return Wave(out, sampling_rate=self.out_sampling_rate)
コード例 #8
0
 def pick_wrapper(self, first: int, last: int, keys: Iterable[str],
                  frame_period: float):
     first_wave = round(first * frame_period / 1000 *
                        self.wave.sampling_rate)
     last_wave = round(last * frame_period / 1000 * self.wave.sampling_rate)
     return AcousticFeatureWrapper(
         wave=Wave(wave=self.wave.wave[first_wave:last_wave],
                   sampling_rate=self.wave.sampling_rate),
         **self.pick(first, last, keys=keys).__dict__,
     )
コード例 #9
0
ファイル: vocoder.py プロジェクト: RaiJPch/test
    def decode(
        self,
        acoustic_feature: AcousticFeature,
    ):
        assert self._synthesizer is not None

        length = len(acoustic_feature.f0)
        f0_buffer = utils.cast_1d_list_to_1d_pointer(
            acoustic_feature.f0.flatten().tolist())
        sp_buffer = utils.cast_2d_list_to_2d_pointer(
            acoustic_feature.sp.tolist())
        ap_buffer = utils.cast_2d_list_to_2d_pointer(
            acoustic_feature.ap.tolist())
        apidefinitions._AddParameters(f0_buffer, length, sp_buffer, ap_buffer,
                                      self._synthesizer)

        ys = []
        while apidefinitions._Synthesis2(self._synthesizer) != 0:
            y = numpy.array([
                self._synthesizer.buffer[i]
                for i in range(self._synthesizer.buffer_size)
            ])
            ys.append(y)

        if len(ys) > 0:
            out_wave = Wave(
                wave=numpy.concatenate(ys),
                sampling_rate=self.out_sampling_rate,
            )
        else:
            out_wave = Wave(
                wave=numpy.empty(0),
                sampling_rate=self.out_sampling_rate,
            )

        self._before_buffer.append(
            (f0_buffer, sp_buffer, ap_buffer))  # for holding memory
        if len(self._before_buffer) > 16:
            self._before_buffer.pop(0)
        return out_wave
コード例 #10
0
ファイル: vocoder.py プロジェクト: RaiJPch/test
 def decode(
     self,
     acoustic_feature: AcousticFeature,
 ):
     acoustic_feature = acoustic_feature.astype_only_float(numpy.float64)
     out = pyworld.synthesize(
         f0=acoustic_feature.f0.ravel(),
         spectrogram=acoustic_feature.spectrogram,
         aperiodicity=acoustic_feature.aperiodicity,
         fs=self.out_sampling_rate,
         frame_period=self.acoustic_param.frame_period,
     )
     return Wave(out, sampling_rate=self.out_sampling_rate)
コード例 #11
0
ファイル: encode_stream.py プロジェクト: RaiJPch/test
    def process(self, start_time: float, time_length: float, extra_time: float) -> AcousticFeatureWrapper:
        wave = self.fetch(
            start_time=start_time,
            time_length=time_length,
            extra_time=extra_time,
        )
        wave = Wave(wave=wave, sampling_rate=self.in_segment_method.sampling_rate)
        feature_wrapper = self.vocoder.encode(wave)

        pad = round(extra_time * self.out_segment_method.sampling_rate)
        if pad > 0:
            feature_wrapper = self.out_segment_method.pick(feature_wrapper, pad, -pad)

        return feature_wrapper
コード例 #12
0
    def pre_convert(self, start_time: float, time_length: float, extra_time: float):
        keys = ['f0', 'ap', 'mc', 'voiced']
        wave = self.fetch(
            start_time=start_time,
            time_length=time_length,
            extra_time=extra_time,
            data_stream=self._data_stream,
            rate=self.sampling_rate,
            pad_function=lambda length: numpy.zeros(shape=length, dtype=self.in_dtype),
            pick_function=lambda segment, first, last: segment.wave.wave[first:last],
            concat_function=numpy.concatenate,
        )
        in_wave = Wave(wave=wave, sampling_rate=self.sampling_rate)
        in_feature = self.vocoder.encode(in_wave)

        pad = round(extra_time * self.sampling_rate)
        in_wave.wave = in_wave.wave[pad:-pad]

        pad = round(extra_time / (self.vocoder.acoustic_param.frame_period / 1000))
        in_feature = in_feature.pick(pad, -pad, keys=keys)

        feature_wrapper = AcousticFeatureWrapper(wave=in_wave, **in_feature.__dict__)
        return feature_wrapper
コード例 #13
0
 def silent_wrapper(
     length: int,
     sizes: Dict[str, int],
     keys: Iterable[str],
     frame_period: float,
     sampling_rate: int,
     wave_dtype,
 ):
     length_wave = round(length * frame_period / 1000 * sampling_rate)
     return AcousticFeatureWrapper(
         wave=Wave(wave=numpy.zeros(shape=length_wave, dtype=wave_dtype),
                   sampling_rate=sampling_rate),
         **AcousticFeatureWrapper.silent(length, sizes=sizes,
                                         keys=keys).__dict__,
     )
コード例 #14
0
    def post_convert(self, start_time: float, time_length: float):
        sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order)
        keys = ['f0', 'ap', 'sp', 'voiced']
        out_feature = self.fetch(
            start_time=start_time,
            time_length=time_length,
            data_stream=self._out_feature_stream,
            rate=1000 / self.frame_period,
            pad_function=lambda length: AcousticFeature.silent(length, sizes=sizes, keys=keys),
            pick_function=lambda segment, first, last: segment.feature.pick(first, last, keys=keys),
            concat_function=lambda buffers: AcousticFeature.concatenate(buffers, keys=keys),
        )

        out_wave = self.vocoder.decode(
            acoustic_feature=out_feature,
        )

        w = out_wave.wave
        w[numpy.isnan(w)] = 0
        out_wave = Wave(wave=w, sampling_rate=out_wave.sampling_rate)
        return out_wave
コード例 #15
0
 def astype_only_float_wrapper(self, dtype):
     return AcousticFeatureWrapper(
         wave=Wave(wave=self.wave.wave.astype(dtype),
                   sampling_rate=self.wave.sampling_rate),
         **self.astype_only_float(dtype).__dict__,
     )
コード例 #16
0
voice_changer_stream.voice_changer = voice_changer
voice_changer_stream.vocoder = vocoder

wrapper = VoiceChangerStreamWrapper(
    voice_changer_stream=voice_changer_stream,
    extra_time_pre=0.2,
    extra_time=0.1,
)

raw_wave, _ = librosa.load(str(test_data_path), sr=audio_config.rate)
wave_out_list = []

start_time = 0
for i in range(0, len(raw_wave), audio_config.chunk):
    wave_in = Wave(wave=raw_wave[i:i + audio_config.chunk], sampling_rate=audio_config.rate)
    wrapper.voice_changer_stream.add_wave(start_time=start_time, wave=wave_in)
    start_time += len(wave_in.wave) / wave_in.sampling_rate

start_time = 0
for i in range(len(raw_wave) // audio_config.chunk + 1):
    feature_in = wrapper.pre_convert_next(time_length=audio_config.chunk / audio_config.rate)
    wrapper.voice_changer_stream.add_in_feature(
        start_time=start_time,
        feature_wrapper=feature_in,
        frame_period=frame_period,
    )
    start_time += audio_config.chunk / audio_config.rate
    print('pre', i, flush=True)

start_time = 0
コード例 #17
0
ファイル: vocoder.py プロジェクト: RaiJPch/test
 def warm_up(self, time_length: float):
     y = numpy.zeros(int(time_length * self.out_sampling_rate))
     w = Wave(wave=y, sampling_rate=self.out_sampling_rate)
     f = self.encode(w)
     self.decode(f)
コード例 #18
0
ファイル: acoustic_converter.py プロジェクト: BURI55/yukarin
 def load_wave(self, path: Path):
     return Wave.load(path, sampling_rate=self._param.sampling_rate)