def synthesis_from_lossless(m_mag, m_real, m_imag, v_shift): m_ph_cmpx = m_real + m_imag * 1j ### with protection against divide-by-zero: m_ph_cmpx_mag = np.absolute(m_ph_cmpx) m_ph_cmpx_mag[m_ph_cmpx_mag==0.0] = 1.0 m_fft = m_mag * m_ph_cmpx / m_ph_cmpx_mag m_fft = la.add_hermitian_half(m_fft, data_type='complex') m_frm = np.fft.ifft(m_fft).real m_frm = np.fft.fftshift(m_frm, axes=1) v_pm = la.shift_to_pm(v_shift) v_syn_sig = mp.ola(m_frm,v_pm) return v_syn_sig
def speech_interp_with_anchors(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len): # MagPhase analysis: m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless(wavfile_a) m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless(wavfile_b) v_pm_a = la.shift_to_pm(v_shift_a) v_pm_b = la.shift_to_pm(v_shift_b) v_sig_a, fs = la.read_audio_file(wavfile_a) v_sig_b, fs = la.read_audio_file(wavfile_b) # Get formants: v_mag_db_a_dummy, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_strt_a, fft_len) v_mag_db_b_dummy, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame(v_sig_b, v_pm_b, nx_strt_a+nframes, fft_len) # Formant mapping:---------------------------------------------------------------- v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping(v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len) v_shifts_syn = np.zeros(nframes, dtype='int') m_frms_syn = np.zeros((nframes, fft_len)) for nx_frm in xrange(nframes): sp_weight = nx_frm / (nframes-1.0) nx_a = nx_strt_a + nx_frm nx_b = nx_strt_b + nx_frm # Computing mag spectrum: v_mag_db_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_a, fft_len)[0] v_mag_db_b = get_formant_locations_from_raw_long_frame(v_sig_b, v_pm_b, nx_b, fft_len)[0] # NOT FINISHED !! # Warping:--------------------------------------------------------------------- # True envelope: v_true_env_db_a = la.true_envelope(v_mag_db_a[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_b = la.true_envelope(v_mag_db_b[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_sp_env_db_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) v_sp_env_db_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) # Spectral envelope mix:------------------------------------------------------- v_sp_env_db_targ = v_sp_env_db_a_warp * (1.0-sp_weight) + v_sp_env_db_b_warp * sp_weight # Source mix:------------------------------------------------------------------ v_spec_diff_db_a = v_sp_env_db_targ - v_true_env_db_a v_spec_diff_db_b = v_sp_env_db_targ - v_true_env_db_b # Filtering (FFT filter): v_frm_short_a_ext_filt = fft_filter(v_frm_short_a, shift_a, v_spec_diff_db_a, fft_len) v_frm_short_b_ext_filt = fft_filter(v_frm_short_b, shift_b, v_spec_diff_db_b, fft_len) # Mix signal: v_frm_short_ext_filt = v_frm_short_a_ext_filt * (1.0-sp_weight) + v_frm_short_b_ext_filt * sp_weight #v_frm_short_ext_filt = v_frm_short_a_ext_filt # Mix shifts: shift_mix = lu.round_to_int(shift_a * (1.0-sp_weight) + shift_b * sp_weight) # Save: v_shifts_syn[nx_frm] = shift_mix m_frms_syn[nx_frm, :] = v_frm_short_ext_filt if False: plt.figure(); plt.plot(v_frm_short_a_ext_filt); plt.plot(v_frm_short_b_ext_filt); plt.grid(); plt.show() plt.figure(); plt.plot(v_frm_short_a_ext_filt); plt.plot(v_frm_short_b_ext_filt); plt.plot(v_frm_short_ext_filt); plt.grid(); plt.show() # Merge: m_frms_syn_dc = np.fft.fftshift(m_frms_syn, axes=1) m_fft_syn = la.remove_hermitian_half(np.fft.fft(m_frms_syn_dc)) m_mag_syn, m_real_syn, m_imag_syn = compute_lossless_spec_feats(m_fft_syn) m_mag_merged = np.vstack((m_mag_a[:nx_strt_a,:] , m_mag_syn , m_mag_b[(nx_strt_b+nframes):,:])) m_real_merged = np.vstack((m_real_a[:nx_strt_a,:] , m_real_syn , m_real_b[(nx_strt_b+nframes):,:])) m_imag_merged = np.vstack((m_imag_a[:nx_strt_a,:] , m_imag_syn , m_imag_b[(nx_strt_b+nframes):,:])) v_shift_merged = np.r_[ v_shift_a[:nx_strt_a] , v_shifts_syn , v_shift_b[(nx_strt_b+nframes):] ] v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged) return v_sig_merged, fs
def speech_interp_with_anchors(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len): # MagPhase analysis: m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless( wavfile_a) m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless( wavfile_b) v_pm_a = la.shift_to_pm(v_shift_a) v_pm_b = la.shift_to_pm(v_shift_b) v_sig_a, fs = la.read_audio_file(wavfile_a) v_sig_b, fs = la.read_audio_file(wavfile_b) # Get formants: v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame( v_sig_a, v_pm_a, nx_strt_a, fft_len) v_mag_db_b, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame( v_sig_b, v_pm_b, nx_strt_b + nframes - 1, fft_len) # Formant mapping:---------------------------------------------------------------- v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping( v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len) # spec envelope anchors:--------------------------------------------------------------------- v_true_env_db_a = la.true_envelope(v_mag_db_a[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_b = la.true_envelope(v_mag_db_b[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] if False: plt.figure() plt.plot(v_mag_db_a) plt.plot(v_true_env_db_a) plt.grid() plt.show() plt.figure() plt.plot(v_mag_db_b) plt.plot(v_true_env_db_b) plt.grid() plt.show() fft_len_half = 1 + fft_len / 2 m_mag_interp = np.zeros((nframes, fft_len_half)) m_real_interp = np.zeros((nframes, fft_len_half)) m_imag_interp = np.zeros((nframes, fft_len_half)) v_shifts_interp = np.zeros(nframes, dtype='int') for nx_frm in xrange(nframes): sp_weight = nx_frm / (nframes - 1.0) nx_a = nx_strt_a + nx_frm nx_b = nx_strt_b + nx_frm # Spectral Warping: v_sp_env_db_curr_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) v_sp_env_db_curr_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1 - sp_weight)) #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) # Spectral envelope mix:------------------------------------------------------- v_sp_env_db_curr_targ = v_sp_env_db_curr_a_warp * ( 1.0 - sp_weight) + v_sp_env_db_curr_b_warp * sp_weight # Whitening:----------------------------------------------------------------------------- # Spectral envelope estimation: # v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_a, fft_len) v_mag_db_curr_a = get_formant_locations_from_raw_long_frame( v_sig_a, v_pm_a, nx_a, fft_len)[0] v_mag_db_curr_b = get_formant_locations_from_raw_long_frame( v_sig_b, v_pm_b, nx_b, fft_len)[0] v_true_env_db_curr_a = la.true_envelope(v_mag_db_curr_a[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_curr_b = la.true_envelope(v_mag_db_curr_b[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_mag_white_a = m_mag_a[nx_a, :] / la.db(v_true_env_db_curr_a, b_inv=True) v_mag_white_b = m_mag_b[nx_b, :] / la.db(v_true_env_db_curr_b, b_inv=True) #if sp_weight>=0.4: import ipdb; ipdb.set_trace(context=8) # breakpoint 6b3a7d8b // if False: plt.figure() plt.plot(v_mag_db_curr_a) plt.plot(v_true_env_db_curr_a) plt.grid() plt.show() plt.figure() plt.plot(v_true_env_db_curr_a) plt.plot(v_true_env_db_curr_b) plt.plot(v_sp_env_db_curr_targ) plt.grid() plt.show() plt.figure() plt.plot(v_mag_db_curr_a) plt.plot(la.db(m_mag_a[nx_a, :])) plt.plot(la.db(v_mag_white_a)) plt.plot(v_true_env_db_curr_a) plt.grid() plt.show() # Impose spectral Env:------------------------------------------------------------------ v_sp_env_targ = la.db(v_sp_env_db_curr_targ, b_inv=True) v_mag_filt_a = v_mag_white_a * v_sp_env_targ v_mag_filt_b = v_mag_white_b * v_sp_env_targ # Mix Sources:------------------------------------------------------------------ v_mag_mix = v_mag_filt_a * (1.0 - sp_weight) + v_mag_filt_b * sp_weight v_real_mix = m_real_a[nx_a, :] * ( 1.0 - sp_weight) + m_real_b[nx_b, :] * sp_weight v_imag_mix = m_imag_a[nx_a, :] * ( 1.0 - sp_weight) + m_imag_b[nx_b, :] * sp_weight # Mix shifts: shift_mix = lu.round_to_int(shift_a * (1.0 - sp_weight) + shift_b * sp_weight) # Save: v_shifts_interp[nx_frm] = shift_mix m_mag_interp[nx_frm, :] = v_mag_mix m_real_interp[nx_frm, :] = v_real_mix m_imag_interp[nx_frm, :] = v_imag_mix if False: plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.grid() plt.show() plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.plot(v_frm_short_ext_filt) plt.grid() plt.show() # Merge: m_mag_merged = np.vstack((m_mag_a[:nx_strt_a, :], m_mag_interp, m_mag_b[(nx_strt_b + nframes):, :])) m_real_merged = np.vstack((m_real_a[:nx_strt_a, :], m_real_interp, m_real_b[(nx_strt_b + nframes):, :])) m_imag_merged = np.vstack((m_imag_a[:nx_strt_a, :], m_imag_interp, m_imag_b[(nx_strt_b + nframes):, :])) v_shift_merged = np.r_[v_shift_a[:nx_strt_a], v_shifts_interp, v_shift_b[(nx_strt_b + nframes):]] v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged) return v_sig_merged, fs
def speech_interp(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len): # MagPhase analysis: m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless( wavfile_a) m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless( wavfile_b) v_pm_a = la.shift_to_pm(v_shift_a) v_pm_b = la.shift_to_pm(v_shift_b) v_sig_a, fs = la.read_audio_file(wavfile_a) v_sig_b, fs = la.read_audio_file(wavfile_b) # Epoch detection: #v_pm_sec_a, v_voi_a = la.reaper_epoch_detection(wavfile_a) #v_pm_sec_b, v_voi_b = la.reaper_epoch_detection(wavfile_b) #v_pm_a = lu.round_to_int(v_pm_sec_a * fs) #v_pm_b = lu.round_to_int(v_pm_sec_b * fs) #m_frms_syn = np.zeros((nframes, fft_len)) fft_len_half = 1 + fft_len / 2 m_mag_interp = np.zeros((nframes, fft_len_half)) m_real_interp = np.zeros((nframes, fft_len_half)) m_imag_interp = np.zeros((nframes, fft_len_half)) v_shifts_interp = np.zeros(nframes, dtype='int') for nx_frm in xrange(nframes): sp_weight = nx_frm / (nframes - 1.0) nx_a = nx_strt_a + nx_frm nx_b = nx_strt_b + nx_frm # Get formants: v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame( v_sig_a, v_pm_a, nx_a, fft_len) v_mag_db_b, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame( v_sig_b, v_pm_b, nx_b, fft_len) # Formant mapping:---------------------------------------------------------------- v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping( v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len) # Warping:--------------------------------------------------------------------- # True envelope: v_true_env_db_a = la.true_envelope(v_mag_db_a[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_b = la.true_envelope(v_mag_db_b[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_sp_env_db_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) v_sp_env_db_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1 - sp_weight)) #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) # Spectral envelope mix:------------------------------------------------------- v_sp_env_db_targ = v_sp_env_db_a_warp * ( 1.0 - sp_weight) + v_sp_env_db_b_warp * sp_weight # Impose spectral Env (FFT filter):------------------------------------------------------ v_sp_env_diff_db_a = v_sp_env_db_targ - v_true_env_db_a v_sp_env_diff_db_b = v_sp_env_db_targ - v_true_env_db_b #v_frm_short_a_ext_filt = fft_filter(v_frm_short_a, shift_a, v_spec_diff_db_a, fft_len) #v_frm_short_b_ext_filt = fft_filter(v_frm_short_b, shift_b, v_spec_diff_db_b, fft_len) #v_sp_env_diff_a = la.db(v_sp_env_diff_db_a, b_inv=True) v_mag_filt_a = m_mag_a[nx_a, :] * la.db(v_sp_env_diff_db_a, b_inv=True) #v_sp_env_diff_b = la.db(v_sp_env_diff_db_b, b_inv=True) v_mag_filt_b = m_mag_b[nx_b, :] * la.db(v_sp_env_diff_db_b, b_inv=True) #fft_filter_magphase_domain(m_mag_a[nx_a,:], m_real_a[nx_a,:], m_imag_a[nx_a,:]) # Mix Sources:------------------------------------------------------------------ v_mag_mix = v_mag_filt_a * (1.0 - sp_weight) + v_mag_filt_b * sp_weight v_real_mix = m_real_a[nx_a, :] * ( 1.0 - sp_weight) + m_real_b[nx_b, :] * sp_weight v_imag_mix = m_imag_a[nx_a, :] * ( 1.0 - sp_weight) + m_imag_b[nx_b, :] * sp_weight # Mix sources: #v_frm_short_ext_filt = v_frm_short_a_ext_filt * (1.0-sp_weight) + v_frm_short_b_ext_filt * sp_weight #v_frm_short_ext_filt = v_frm_short_a_ext_filt # Mix shifts: shift_mix = lu.round_to_int(shift_a * (1.0 - sp_weight) + shift_b * sp_weight) # Save: v_shifts_interp[nx_frm] = shift_mix m_mag_interp[nx_frm, :] = v_mag_mix m_real_interp[nx_frm, :] = v_real_mix m_imag_interp[nx_frm, :] = v_imag_mix #m_frms_syn[nx_frm, :] = v_frm_short_ext_filt if False: plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.grid() plt.show() plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.plot(v_frm_short_ext_filt) plt.grid() plt.show() # Merge: #m_frms_syn_dc = np.fft.fftshift(m_frms_syn, axes=1) #m_fft_syn = la.remove_hermitian_half(np.fft.fft(m_frms_syn_dc)) #m_mag_syn, m_real_syn, m_imag_syn = compute_lossless_spec_feats(m_fft_syn) m_mag_merged = np.vstack((m_mag_a[:nx_strt_a, :], m_mag_interp, m_mag_b[(nx_strt_b + nframes):, :])) m_real_merged = np.vstack((m_real_a[:nx_strt_a, :], m_real_interp, m_real_b[(nx_strt_b + nframes):, :])) m_imag_merged = np.vstack((m_imag_a[:nx_strt_a, :], m_imag_interp, m_imag_b[(nx_strt_b + nframes):, :])) v_shift_merged = np.r_[v_shift_a[:nx_strt_a], v_shifts_interp, v_shift_b[(nx_strt_b + nframes):]] v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged) return v_sig_merged, fs