Ejemplo n.º 1
0
def world_synthesis(wav_name, feat_param, f0, ap, spectral, spectral_type):
    """WORLD SPEECH SYNTHESIS
    Args:
        wav_name (str): filename of synthesised wav
        feat_param (dict): acoustic feature parameter dictionary
        f0(np array): pitch features
        ap: aperiodicity features
        spectral: spectral features
        spectral_type: spectral feature type (sp or mcc)
    """
    synthesizer = Synthesizer(fs=feat_param['fs'],
                              fftl=feat_param['fftl'],
                              shiftms=feat_param['shiftms'])

    if spectral_type == 'mcc':
        wav = synthesizer.synthesis(f0,
                                    spectral,
                                    ap,
                                    alpha=feat_param['mcep_alpha'])
    elif spectral_type == 'sp':
        wav = synthesizer.synthesis_spc(f0, spectral, ap)
    else:
        logging.info("Currently support 'mcep' or 'spc' only.")
        raise ValueError

    wav = np.clip(wav, -32768, 32767)
    wavfile.write(wav_name, feat_param['fs'], wav.astype(np.int16))
    logging.info("wrote %s." % (wav_name))
Ejemplo n.º 2
0
class ConverterWorker:
    '''
    特徴量→変換特徴量→修正特徴量→変換音声
    '''
    def __init__(self, feature_queue: Queue, converted_queue: Queue,
                 mcep_gmm_config: configs.McepGMMConfig,
                 f0_stats_config: configs.F0StatsConfig,
                 gv_config: configs.GVConfig,
                 synthesizer_config: configs.SynthesizerConfig):

        self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix,
                                      covtype=mcep_gmm_config.covtype,
                                      gmmmode=None)
        self._mcep_gmm.open_from_param(mcep_gmm_config.param)
        self._mcep_gmm_config = mcep_gmm_config

        self._feature_queue: Queue = feature_queue
        self._converted_queue: Queue = converted_queue

        self._f0_stats = F0statistics()
        self._f0_stats_config = f0_stats_config

        self._mcep_gv = GV()
        self._mcep_gv_config = gv_config

        self._synthesizer = Synthesizer(fs=synthesizer_config.fs,
                                        fftl=synthesizer_config.fftl,
                                        shiftms=synthesizer_config.shiftms)
        self._synthesizer_config = synthesizer_config

    def convert_from_feature(self, f0, spc, ap, mcep) -> numpy.ndarray:
        cv_f0 = self._f0_stats.convert(f0, self._f0_stats_config.source_stats,
                                       self._f0_stats_config.target_stats)

        cv_mcep_wopow = self._mcep_gmm.convert(
            static_delta(mcep[:, 1:]), cvtype=self._mcep_gmm_config.cvtype)
        cv_mcep = numpy.c_[mcep[:, 0], cv_mcep_wopow]

        cv_mcep_wGV = self._mcep_gv.postfilter(
            cv_mcep,
            self._mcep_gv_config.target_stats,
            cvgvstats=self._mcep_gv_config.cvgv_stats,
            alpha=self._mcep_gv_config.morph_coeff,
            startdim=1)

        output_wav = self._synthesizer.synthesis(
            cv_f0,
            cv_mcep_wGV,
            ap,
            rmcep=mcep,
            alpha=self._synthesizer_config.mcep_alpha)
        return output_wav.clip(-32768, 32767).astype(numpy.core.int16)

    def start(self):
        while True:
            feature = self._feature_queue.get()  # 同期処理
            f0, spc, ap, mcep = feature
            output_wav = self.convert_from_feature(f0, spc, ap, mcep)
            self._converted_queue.put(output_wav)
Ejemplo n.º 3
0
def world_speech_synthesis(queue, wav_list, args):
    """WORLD SPEECH SYNTHESIS
    Parameters
    ----------
    queue : multiprocessing.Queue()
        the queue to store the file name of utterance
    wav_list : list
        list of the wav files
    args : 
        feature extract arguments
    """
    # define ynthesizer
    synthesizer = Synthesizer(fs=args.fs, fftl=args.fftl, shiftms=args.shiftms)
    # synthesis
    for i, wav_name in enumerate(wav_list):
        if args.feature_dir == None:
            restored_name = wav_name.replace("wav",
                                             args.feature_format + "_restored")
            restored_name = restored_name.replace(
                ".%s" % args.feature_format + "_restored", ".wav")
            feat_name = wav_name.replace("wav", args.feature_format)
        else:
            restored_name = rootdir_replace(wav_name,
                                            newdir=args.feature_dir +
                                            "restored")
            feat_name = rootdir_replace(wav_name,
                                        extname=args.feature_format,
                                        newdir=args.feature_dir)
        if os.path.exists(restored_name):
            if args.overwrite:
                logging.info("overwrite %s (%d/%d)" %
                             (restored_name, i + 1, len(wav_list)))
            else:
                logging.info("skip %s (%d/%d)" %
                             (restored_name, i + 1, len(wav_list)))
                continue
        else:
            logging.info("now processing %s (%d/%d)" %
                         (restored_name, i + 1, len(wav_list)))
        # load acoustic features
        if check_hdf5(feat_name, "/world"):
            h = read_hdf5(feat_name, "/world")
        else:
            logging.error("%s is not existed." % (feat_name))
            sys.exit(1)
        if check_hdf5(feat_name, "/f0"):
            f0 = read_hdf5(feat_name, "/f0")
        else:
            uv = h[:, 0].copy(order='C')
            f0 = h[:, args.f0_dim_idx].copy(order='C')  # cont_f0_lpf
            fz_idx = np.where(uv == 0.0)
            f0[fz_idx] = 0.0
        if check_hdf5(feat_name, "/ap"):
            ap = read_hdf5(feat_name, "/ap")
        else:
            codeap = h[:, args.ap_dim_idx:].copy(order='C')
            ap = pyworld.decode_aperiodicity(codeap, args.fs, args.fftl)
        mcep = h[:, args.mcep_dim_start:args.mcep_dim_end].copy(order='C')
        # waveform synthesis
        wav = synthesizer.synthesis(f0, mcep, ap, alpha=args.mcep_alpha)
        wav = np.clip(wav, -32768, 32767)
        wavfile.write(restored_name, args.fs, wav.astype(np.int16))
        #logging.info("wrote %s." % (restored_name))
    queue.put('Finish')