def world_synthesis(wav_name, feat_param, f0, ap, spectral, spectral_type): """WORLD SPEECH SYNTHESIS Args: wav_name (str): filename of synthesised wav feat_param (dict): acoustic feature parameter dictionary f0(np array): pitch features ap: aperiodicity features spectral: spectral features spectral_type: spectral feature type (sp or mcc) """ synthesizer = Synthesizer(fs=feat_param['fs'], fftl=feat_param['fftl'], shiftms=feat_param['shiftms']) if spectral_type == 'mcc': wav = synthesizer.synthesis(f0, spectral, ap, alpha=feat_param['mcep_alpha']) elif spectral_type == 'sp': wav = synthesizer.synthesis_spc(f0, spectral, ap) else: logging.info("Currently support 'mcep' or 'spc' only.") raise ValueError wav = np.clip(wav, -32768, 32767) wavfile.write(wav_name, feat_param['fs'], wav.astype(np.int16)) logging.info("wrote %s." % (wav_name))
class ConverterWorker: ''' 特徴量→変換特徴量→修正特徴量→変換音声 ''' def __init__(self, feature_queue: Queue, converted_queue: Queue, mcep_gmm_config: configs.McepGMMConfig, f0_stats_config: configs.F0StatsConfig, gv_config: configs.GVConfig, synthesizer_config: configs.SynthesizerConfig): self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix, covtype=mcep_gmm_config.covtype, gmmmode=None) self._mcep_gmm.open_from_param(mcep_gmm_config.param) self._mcep_gmm_config = mcep_gmm_config self._feature_queue: Queue = feature_queue self._converted_queue: Queue = converted_queue self._f0_stats = F0statistics() self._f0_stats_config = f0_stats_config self._mcep_gv = GV() self._mcep_gv_config = gv_config self._synthesizer = Synthesizer(fs=synthesizer_config.fs, fftl=synthesizer_config.fftl, shiftms=synthesizer_config.shiftms) self._synthesizer_config = synthesizer_config def convert_from_feature(self, f0, spc, ap, mcep) -> numpy.ndarray: cv_f0 = self._f0_stats.convert(f0, self._f0_stats_config.source_stats, self._f0_stats_config.target_stats) cv_mcep_wopow = self._mcep_gmm.convert( static_delta(mcep[:, 1:]), cvtype=self._mcep_gmm_config.cvtype) cv_mcep = numpy.c_[mcep[:, 0], cv_mcep_wopow] cv_mcep_wGV = self._mcep_gv.postfilter( cv_mcep, self._mcep_gv_config.target_stats, cvgvstats=self._mcep_gv_config.cvgv_stats, alpha=self._mcep_gv_config.morph_coeff, startdim=1) output_wav = self._synthesizer.synthesis( cv_f0, cv_mcep_wGV, ap, rmcep=mcep, alpha=self._synthesizer_config.mcep_alpha) return output_wav.clip(-32768, 32767).astype(numpy.core.int16) def start(self): while True: feature = self._feature_queue.get() # 同期処理 f0, spc, ap, mcep = feature output_wav = self.convert_from_feature(f0, spc, ap, mcep) self._converted_queue.put(output_wav)
def world_speech_synthesis(queue, wav_list, args): """WORLD SPEECH SYNTHESIS Parameters ---------- queue : multiprocessing.Queue() the queue to store the file name of utterance wav_list : list list of the wav files args : feature extract arguments """ # define ynthesizer synthesizer = Synthesizer(fs=args.fs, fftl=args.fftl, shiftms=args.shiftms) # synthesis for i, wav_name in enumerate(wav_list): if args.feature_dir == None: restored_name = wav_name.replace("wav", args.feature_format + "_restored") restored_name = restored_name.replace( ".%s" % args.feature_format + "_restored", ".wav") feat_name = wav_name.replace("wav", args.feature_format) else: restored_name = rootdir_replace(wav_name, newdir=args.feature_dir + "restored") feat_name = rootdir_replace(wav_name, extname=args.feature_format, newdir=args.feature_dir) if os.path.exists(restored_name): if args.overwrite: logging.info("overwrite %s (%d/%d)" % (restored_name, i + 1, len(wav_list))) else: logging.info("skip %s (%d/%d)" % (restored_name, i + 1, len(wav_list))) continue else: logging.info("now processing %s (%d/%d)" % (restored_name, i + 1, len(wav_list))) # load acoustic features if check_hdf5(feat_name, "/world"): h = read_hdf5(feat_name, "/world") else: logging.error("%s is not existed." % (feat_name)) sys.exit(1) if check_hdf5(feat_name, "/f0"): f0 = read_hdf5(feat_name, "/f0") else: uv = h[:, 0].copy(order='C') f0 = h[:, args.f0_dim_idx].copy(order='C') # cont_f0_lpf fz_idx = np.where(uv == 0.0) f0[fz_idx] = 0.0 if check_hdf5(feat_name, "/ap"): ap = read_hdf5(feat_name, "/ap") else: codeap = h[:, args.ap_dim_idx:].copy(order='C') ap = pyworld.decode_aperiodicity(codeap, args.fs, args.fftl) mcep = h[:, args.mcep_dim_start:args.mcep_dim_end].copy(order='C') # waveform synthesis wav = synthesizer.synthesis(f0, mcep, ap, alpha=args.mcep_alpha) wav = np.clip(wav, -32768, 32767) wavfile.write(restored_name, args.fs, wav.astype(np.int16)) #logging.info("wrote %s." % (restored_name)) queue.put('Finish')