def synthesisLPC2PSD(source_object, filter_object, seeds_signals): fft_size = (filter_object["spectrogram"].shape[0] - 1) * 2 print("synthesis lpc 2 psd") excitation_signal, _, _ = synthesisRequiem.get_excitation_signal( source_object['temporal_positions'], filter_object['fs'], source_object['f0'], source_object['vuv'], seeds_signals['pulse'], seeds_signals['noise'], source_object['aperiodicity']) psd = filter_object['spectrogram'] lpcs = [] gains = [] start_time = time.time() for i in range(psd.shape[1]): lpc_coef, g = lpc.psd2lpc(psd[:, i], order=ORDER) lpcs.append(lpc_coef) gains.append(g) print("lpc coef cost {}".format(time.time() - start_time)) recons_psds = [] start_time = time.time() for lpc_coef, g in zip(lpcs, gains): recons_psd = lpc.lpc2psd(lpc_coef, g, fft_size) recons_psds.append(recons_psd) recons_psds = np.array(recons_psds) y0 = synthesisRequiem.get_waveform(excitation_signal, np.transpose(recons_psds, [1, 0]), source_object['temporal_positions'], source_object['f0'], filter_object['fs']) print("filter cost {}".format(time.time() - start_time)) return y0
def synthesisPSD(source_object, filter_object, seeds_signals): fft_size = (filter_object["spectrogram"].shape[0] - 1) * 2 print("synthesis psd") excitation_signal, _, _ = synthesisRequiem.get_excitation_signal( source_object['temporal_positions'], filter_object['fs'], source_object['f0'], source_object['vuv'], seeds_signals['pulse'], seeds_signals['noise'], source_object['aperiodicity']) start_time = time.time() y = synthesisRequiem.get_waveform(excitation_signal, filter_object['spectrogram'], source_object['temporal_positions'], source_object['f0'], filter_object['fs']) print("filter cost {}".format(time.time() - start_time)) return y
def synthesisGMF_IAIF(source_object, filter_object, seeds_signals): fft_size = (filter_object["spectrogram"].shape[0] - 1) * 2 print("synthesis gmf-iaif") excitation_signal, _, _ = synthesisRequiem.get_excitation_signal( source_object['temporal_positions'], filter_object['fs'], source_object['f0'], source_object['vuv'], seeds_signals['pulse'], seeds_signals['noise'], source_object['aperiodicity']) psd = filter_object['spectrogram'] glottal_lpcs = [] glottal_gains = [] lpcs = [] gains = [] start_time = time.time() for i in range(psd.shape[1]): lpc_coef, g = lpc.psd2lpc(psd[:, i], order=ORDER) glottal_lpc, glottal_g, lpc_coef, g = gmf_iaif.gmf_iaif(psd[:, i], vt_order=ORDER) glottal_lpcs.append(glottal_lpc) glottal_gains.append(glottal_g) lpcs.append(lpc_coef) gains.append(g) print("gmf-iaif cost :{}, frames: {}".format(time.time() - start_time, psd.shape[1])) recons_psds = [] start_time = time.time() for glottal_lpc, glottal_g, lpc_coef, g in zip(glottal_lpcs, glottal_gains, lpcs, gains): recons_psd = lpc.lpc2psd(glottal_lpc, glottal_g * g, fft_size) #recons_psd *= lpc.lpc2psd(lpc_coef,g,fft_size) recons_psds.append(recons_psd) recons_psds = np.array(recons_psds) y0 = synthesisRequiem.get_waveform(excitation_signal, np.transpose(recons_psds, [1, 0]), source_object['temporal_positions'], source_object['f0'], filter_object['fs']) print("filter cost {}".format(time.time() - start_time)) return y0
recons_vt_psds = np.array(recons_vt_psds) df = pysptk.synthesis.AllZeroDF(ORDER) synthesizer = pysptk.synthesis.Synthesizer(df, int(fs * 0.005)) x_glottal_res_zerodf = synthesizer.synthesis(x, lpcs / gains) df = pysptk.synthesis.AllZeroDF(3) synthesizer = pysptk.synthesis.Synthesizer(df, int(fs * 0.005)) x_res_zerodf = synthesizer.synthesis(x_glottal_res_zerodf, glottal_lpcs / glottal_gains) wavwrite('x_glottal_res_zerodf.wav', fs, (x_glottal_res_zerodf * 2**15).astype(np.int16)) wavwrite('x_res_zerodf.wav', fs, (x_res_zerodf * 2**15).astype(np.int16)) y = synthesisRequiem.get_waveform(x_res_zerodf, np.transpose(recons_psds, [1, 0]), dat['temporal_positions'], dat['f0'], dat['fs']) y_from_glottal = synthesisRequiem.get_waveform( x_glottal_res_zerodf, np.transpose(recons_vt_psds, [1, 0]), dat['temporal_positions'], dat['f0'], dat['fs']) wavwrite('x_recons_zerodf.wav', fs, (y * 2**15).astype(np.int16)) wavwrite('x_recons_glottal_zerodf.wav', fs, (y_from_glottal * 2**15).astype(np.int16)) x_res = np.zeros([x.shape[0] + 100]) x_glottal_res = np.zeros([x.shape[0] + 100]) temporal_positions = dat['temporal_positions'] f0_sequence = dat['f0']
tmp_complex_cepstrum = np.zeros(fft_size) tmp_complex_cepstrum[latter_index.astype(int) - 1] = tmp_cepstrum[latter_index.astype(int) - 1] * 2 tmp_complex_cepstrum[0] = tmp_cepstrum[0] spectrum = np.exp(np.fft.ifft(tmp_complex_cepstrum)) response = ifft(1 / spectrum * fft(win_x, fft_size)).real inv = response origin = int(temporal_position * fs + 0.501) + 1 safe_index = np.minimum( len(x_res) - 1, np.arange(origin, origin + fft_size)) x_res[safe_index] += inv print("filter cost {}".format(time.time() - start_time)) return x_res x_res = inverse_psd(x, dat) os.makedirs(out_path, exist_ok=True) wavwrite(os.path.join(out_path, 'x_res.wav'), fs, (x_res * 2**15).astype(np.int16)) y = synthesisRequiem.get_waveform(x_res, dat['spectrogram'], dat['temporal_positions'], dat['f0'], dat['fs']) wavwrite(os.path.join(out_path, 'x_recons.wav'), fs, (y * 2**15).astype(np.int16))