Exemple #1
0
    def run_world_synth(synth_output: Dict[str, np.ndarray],
                        hparams: ExtendedHParams,
                        epoch: int = None,
                        step: int = None,
                        use_model_name: bool = True,
                        has_deltas: bool = False) -> None:
        """Run the WORLD synthesize method."""

        fft_size = pyworld.get_cheaptrick_fft_size(hparams.synth_fs)

        save_dir = Synthesiser._get_synth_dir(hparams,
                                              use_model_name,
                                              epoch=epoch,
                                              step=step)

        for id_name, output in synth_output.items():
            logging.info(
                "Synthesise {} with the WORLD vocoder.".format(id_name))

            coded_sp, lf0, vuv, bap = WorldFeatLabelGen.convert_to_world_features(
                output,
                contains_deltas=has_deltas,
                num_coded_sps=hparams.num_coded_sps,
                num_bap=hparams.num_bap)
            amp_sp = AudioProcessing.decode_sp(
                coded_sp,
                hparams.sp_type,
                hparams.synth_fs,
                post_filtering=hparams.do_post_filtering).astype(np.double,
                                                                 copy=False)
            args = dict()
            for attr in "preemphasis", "f0_silence_threshold", "lf0_zero":
                if hasattr(hparams, attr):
                    args[attr] = getattr(hparams, attr)
            waveform = WorldFeatLabelGen.world_features_to_raw(
                amp_sp,
                lf0,
                vuv,
                bap,
                fs=hparams.synth_fs,
                n_fft=fft_size,
                **args)

            # Always save as wav file first and convert afterwards if necessary.
            file_name = (os.path.basename(id_name) +
                         hparams.synth_file_suffix + '_' +
                         str(hparams.num_coded_sps) + hparams.sp_type +
                         "_WORLD")
            file_path = os.path.join(save_dir, file_name)
            soundfile.write(file_path + ".wav", waveform, hparams.synth_fs)

            # Use PyDub for special audio formats.
            if hparams.synth_ext.lower() != 'wav':
                as_wave = pydub.AudioSegment.from_wav(file_path + ".wav")
                file = as_wave.export(file_path + "." + hparams.synth_ext,
                                      format=hparams.synth_ext)
                file.close()
                os.remove(file_path + ".wav")
Exemple #2
0
    def run_world_synth(synth_output, hparams):
        """Run the WORLD synthesize method."""

        fft_size = pyworld.get_cheaptrick_fft_size(hparams.synth_fs)

        save_dir = hparams.synth_dir if hparams.synth_dir is not None\
                                     else hparams.out_dir if hparams.out_dir is not None\
                                     else os.path.curdir
        for id_name, output in synth_output.items():
            logging.info(
                "Synthesise {} with the WORLD vocoder.".format(id_name))

            coded_sp, lf0, vuv, bap = WorldFeatLabelGen.convert_to_world_features(
                output,
                contains_deltas=False,
                num_coded_sps=hparams.num_coded_sps)
            amp_sp = WorldFeatLabelGen.decode_sp(
                coded_sp,
                hparams.sp_type,
                hparams.synth_fs,
                post_filtering=hparams.do_post_filtering).astype(np.double,
                                                                 copy=False)
            args = dict()
            for attr in "preemphasize", "f0_silence_threshold", "lf0_zero":
                if hasattr(hparams, attr):
                    args[attr] = getattr(hparams, attr)
            waveform = WorldFeatLabelGen.world_features_to_raw(
                amp_sp,
                lf0,
                vuv,
                bap,
                fs=hparams.synth_fs,
                n_fft=fft_size,
                **args)

            # f0 = np.exp(lf0, dtype=np.float64)
            # vuv[f0 < WorldFeatLabelGen.f0_silence_threshold] = 0  # WORLD throws an error for too small f0 values.
            # f0[vuv == 0] = 0.0
            # ap = pyworld.decode_aperiodicity(np.ascontiguousarray(bap.reshape(-1, 1), np.float64),
            #                                  hparams.synth_fs,
            #                                  fft_size)
            #
            # waveform = pyworld.synthesize(f0, amp_sp, ap, hparams.synth_fs)
            # waveform = waveform.astype(np.float32, copy=False)  # Does inplace conversion, if possible.

            # Always save as wav file first and convert afterwards if necessary.
            file_path = os.path.join(
                save_dir, "{}{}{}{}".format(
                    os.path.basename(id_name), "_" + hparams.model_name
                    if hparams.model_name is not None else "",
                    hparams.synth_file_suffix, "_WORLD"))
            makedirs_safe(hparams.synth_dir)
            soundfile.write(file_path + ".wav", waveform, hparams.synth_fs)

            # Use PyDub for special audio formats.
            if hparams.synth_ext.lower() != 'wav':
                as_wave = pydub.AudioSegment.from_wav(file_path + ".wav")
                file = as_wave.export(file_path + "." + hparams.synth_ext,
                                      format=hparams.synth_ext)
                file.close()
                os.remove(file_path + ".wav")