def spectral_crossfade(m_sp_l, m_sp_r, cut_off, bw, fs, freq_scale='hz'): # Hz to bin: nFFThalf = m_sp_l.shape[1] nFFT = (nFFThalf - 1) * 2 bin_l = lu.round_to_int(hz_to_bin(cut_off - bw / 2, nFFT, fs)) bin_r = lu.round_to_int(hz_to_bin(cut_off + bw / 2, nFFT, fs)) # Gen short windows: bw_bin = bin_r - bin_l v_win_shrt = np.hanning(2 * bw_bin + 1) v_win_shrt_l = v_win_shrt[bw_bin:] v_win_shrt_r = v_win_shrt[:bw_bin + 1] # Gen long windows: v_win_l = np.hstack( (np.ones(bin_l), v_win_shrt_l, np.zeros(nFFThalf - bin_r - 1))) v_win_r = np.hstack( (np.zeros(bin_l), v_win_shrt_r, np.ones(nFFThalf - bin_r - 1))) # Apply windows: m_sp_l_win = m_sp_l * v_win_l[None, :] m_sp_r_win = m_sp_r * v_win_r[None, :] m_sp = m_sp_l_win + m_sp_r_win return m_sp
def spectral_crossfade(m_sp_l, m_sp_r, cut_off, bw, fs, freq_scale='hz'): ''' holdon() nx = 120 plot(m_sp_l[nx,:], '-b') plot(m_sp_r[nx,:], '-r') holdoff() ''' # Hz to bin: nFFThalf = m_sp_l.shape[1] nFFT = (nFFThalf - 1) * 2 bin_l = lu.round_to_int(hz_to_bin(cut_off - bw / 2, nFFT, fs)) bin_r = lu.round_to_int(hz_to_bin(cut_off + bw / 2, nFFT, fs)) # Gen short windows: bw_bin = bin_r - bin_l v_win_shrt = np.hanning(2 * bw_bin + 1) v_win_shrt_l = v_win_shrt[bw_bin:] v_win_shrt_r = v_win_shrt[:bw_bin + 1] # Gen long windows: v_win_l = np.hstack( (np.ones(bin_l), v_win_shrt_l, np.zeros(nFFThalf - bin_r - 1))) v_win_r = np.hstack( (np.zeros(bin_l), v_win_shrt_r, np.ones(nFFThalf - bin_r - 1))) # Apply windows: m_sp_l_win = m_sp_l * v_win_l[None, :] m_sp_r_win = m_sp_r * v_win_r[None, :] m_sp = m_sp_l_win + m_sp_r_win ''' holdon() nx = 220 plot(m_sp_l[nx,:], '-.b') plot(m_sp_l_win[nx,:], '-.r') plot(m_sp_r[nx,:], '-g') plot(m_sp_r_win[nx,:], '-k') holdoff() ''' ''' holdon() nx = 196 plot(db(m_sp_l[nx,:]), '.-b') #plot(db(m_sp_l_win[nx,:]), '.-r') #plot(db(m_sp_r_win[nx,:]), '-g') plot(db(m_sp[nx,:]), '-k') holdoff() ''' return m_sp
def unwarp_from_fbank(m_mag_mel, v_bins_warp, interp_kind='quadratic'): ''' n_bins: number of frequency bins (i.e., Hz). v_bins_warp: Mapping from input bins to output (monotonically crescent from 0 to any positive number). Requirement: length = m_mag.shape[1]. If wanted, use build_mel_curve(...) to construct it. ''' nfrms, n_melbands = m_mag_mel.shape n_bins = v_bins_warp.size # Bands gen: maxval = v_bins_warp[-1] v_cntrs_mel = np.linspace(0, maxval, n_melbands) # To linear frequency: f_interp = interpolate.interp1d(v_bins_warp, np.arange(n_bins), kind=interp_kind) v_cntrs = lu.round_to_int(f_interp(v_cntrs_mel)) # Process per frame: v_bins = np.arange(n_bins) m_mag = np.zeros((nfrms, n_bins)) for nxf in xrange(nfrms): f_interp = interpolate.interp1d(v_cntrs, m_mag_mel[nxf, :], kind=interp_kind) #f_interp = interpolate.interp1d(v_cntrs, m_mag_mel[nxf,:], kind='linear') m_mag[nxf, :] = f_interp(v_bins) return m_mag
def read_reaper_est_file(est_file, check_len_smpls=-1, fs=-1, skiprows=7, usecols=[0, 1]): # Checking input params: if (check_len_smpls > 0) and (fs == -1): raise ValueError( 'If check_len_smpls given, fs must be provided as well.') # Read text: TODO: improve skiprows m_data = np.loadtxt(est_file, skiprows=skiprows, usecols=usecols) m_data = np.atleast_2d(m_data) v_pm_sec = m_data[:, 0] v_voi = m_data[:, 1] # Protection against REAPER bugs 1: vb_correct = np.hstack((True, np.diff(v_pm_sec) > 0)) v_pm_sec = v_pm_sec[vb_correct] v_voi = v_voi[vb_correct] # Protection against REAPER bugs 2 (maybe it needs a better protection): if (check_len_smpls > 0): v_pm_smpls = lu.round_to_int(v_pm_sec * fs) if (v_pm_smpls[-1] >= (check_len_smpls - 1)): vb_correct_2 = v_pm_smpls < (check_len_smpls - 1) v_pm_smpls = v_pm_smpls[vb_correct_2] v_pm_sec = v_pm_sec[vb_correct_2] v_voi = v_voi[vb_correct_2] return v_pm_sec, v_voi
def gen_wider_window(func_win, len_l, len_r, flat_to_len_ratio): fade_to_len_ratio = 1 - flat_to_len_ratio len_l = lu.round_to_int(len_l) len_r = lu.round_to_int(len_r) len_l_fade = lu.round_to_int(fade_to_len_ratio * len_l) len_r_fade = lu.round_to_int(fade_to_len_ratio * len_r) v_win_l = func_win(2 * len_l_fade + 1) v_win_l = v_win_l[:len_l_fade] v_win_r = func_win(2 * len_r_fade + 1) v_win_r = v_win_r[len_r_fade + 1:] len_total = len_l + len_r len_flat = len_total - (len_l_fade + len_r_fade) v_win = np.hstack((v_win_l, np.ones(len_flat), v_win_r)) return v_win
def apply_fbank(m_mag, v_bins_warp, nbands, win_func=np.hanning, mode='average'): ''' Applies an average filter bank. nbands: number of output bands. v_bins_warp: Mapping from input bins to output (monotonically crescent from 0 to any positive number). Requirement: length = m_mag.shape[1]. If wanted, use build_mel_curve(...) to construct it. ''' nfrms, nbins = m_mag.shape # Bands gen: maxval = v_bins_warp[-1] v_cntrs_mel = np.linspace(0, maxval, nbands) # To linear frequency: f_interp = interpolate.interp1d(v_bins_warp, np.arange(nbins), kind='quadratic') v_cntrs = lu.round_to_int(f_interp(v_cntrs_mel)) # Build filter bank: m_fbank = np.zeros((nbins, nbands)) v_cntrs_ext = np.r_[v_cntrs[0], v_cntrs, v_cntrs[-1]] v_winlen = np.zeros(nbands) for nxb in xrange(1, nbands + 1): winlen_l = v_cntrs_ext[nxb] - v_cntrs_ext[nxb - 1] winlen_r = v_cntrs_ext[nxb + 1] - v_cntrs_ext[nxb] v_win = gen_non_symmetric_win(winlen_l, winlen_r, win_func=win_func, b_norm=True) winlen = v_win.size v_winlen[nxb - 1] = winlen m_fbank[v_cntrs_ext[nxb - 1]:(v_cntrs_ext[nxb - 1] + winlen), nxb - 1] = v_win # Apply filterbank: if mode == 'average': m_mag_mel = np.dot(m_mag, m_fbank) elif mode == 'maxabs': m_mag_mel = np.zeros((nfrms, nbands)) for nxf in xrange(nfrms): v_mag = m_mag[nxf, :] m_filtered = v_mag[:, None] * m_fbank v_nx_max = np.argmax(np.abs(m_filtered), axis=0) m_mag_mel[nxf, :] = v_mag[v_nx_max] return m_mag_mel, v_winlen
def spectral_crossfade(m_sp_l, m_sp_r, cut_off, bw, fs, freq_scale='hz'): # Hz to bin: nFFThalf = m_sp_l.shape[1] nFFT = (nFFThalf - 1) * 2 bin_l = lu.round_to_int(hz_to_bin(cut_off - bw/2, nFFT, fs)) bin_r = lu.round_to_int(hz_to_bin(cut_off + bw/2, nFFT, fs)) # Gen short windows: bw_bin = bin_r - bin_l v_win_shrt = np.hanning(2*bw_bin + 1) v_win_shrt_l = v_win_shrt[bw_bin:] v_win_shrt_r = v_win_shrt[:bw_bin+1] # Gen long windows: v_win_l = np.hstack((np.ones(bin_l), v_win_shrt_l , np.zeros(nFFThalf - bin_r - 1))) v_win_r = np.hstack((np.zeros(bin_l), v_win_shrt_r , np.ones(nFFThalf - bin_r - 1))) # Apply windows: m_sp_l_win = m_sp_l * v_win_l[None,:] m_sp_r_win = m_sp_r * v_win_r[None,:] m_sp = m_sp_l_win + m_sp_r_win return m_sp
def rceps_spectral_smoothing(m_sp, in_type='splog', nc_total=60, fade_to_total=0.2): #dp = lu.DimProtect() #m_sp = dp.start(m_sp) nc_fade = lu.round_to_int(fade_to_total * nc_total) # Getting Cepstrum: m_rceps = rceps(m_sp, in_type=in_type) m_minph_rceps = rceps_to_min_phase(m_rceps) v_ener_orig_rms = np.sqrt(np.mean(m_minph_rceps**2, axis=1)) # Create window: v_win_shrt = np.hanning(2 * nc_fade + 3) v_win_shrt = v_win_shrt[nc_fade + 2:-1] # Windowing: m_minph_rceps[:, nc_total:] = 0 m_minph_rceps[:, nc_total - nc_fade:nc_total] *= v_win_shrt # Energy compensation: v_ener_after_rms = np.sqrt(np.mean(m_minph_rceps**2, axis=1)) v_ener_fact = v_ener_orig_rms / v_ener_after_rms m_minph_rceps = m_minph_rceps * v_ener_fact[:, None] # Go back to spectrum: nFFT = m_rceps.shape[1] m_sp_sm = np.fft.fft(m_minph_rceps, n=nFFT).real m_sp_sm = remove_hermitian_half(m_sp_sm) # Plots: # from libdevhelpers import * # holdon() # nx = 134 # plot(np.log(m_sp[nx,:]), '-b') # plot(m_sp_sm[nx,:], '-r') # holdoff() return m_sp_sm
def spectral_smoothing_rceps(m_sp_log, nc_total=60, fade_to_total=0.2): ''' m_sp_log could be in any base log or decibels. ''' nc_fade = lu.round_to_int(fade_to_total * nc_total) # Adding hermitian half: m_sp_log_ext = add_hermitian_half(m_sp_log) # Getting Cepstrum: m_rceps = np.fft.ifft(m_sp_log_ext).real m_rceps_minph = rceps_to_min_phase_rceps(m_rceps) #v_ener_orig_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1)) # Create window: v_win_shrt = np.hanning(2 * nc_fade + 3) v_win_shrt = v_win_shrt[nc_fade + 2:-1] # Windowing: m_rceps_minph[:, nc_total:] = 0 m_rceps_minph[:, nc_total - nc_fade:nc_total] *= v_win_shrt # Energy compensation: #v_ener_after_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1)) #v_ener_fact = v_ener_orig_rms / v_ener_after_rms #m_rceps_minph = m_rceps_minph * v_ener_fact[:,None] # Go back to spectrum: nfft = m_rceps.shape[1] m_sp_log_sm = np.fft.fft(m_rceps_minph, n=nfft).real m_sp_log_sm = remove_hermitian_half(m_sp_log_sm) #m_sp_sm = np.exp(m_sp_sm) return m_sp_log_sm
def spectral_smoothing_rceps(m_sp_log, nc_total=60, fade_to_total=0.2): ''' m_sp_log could be in any base log or decibels. ''' nc_fade = lu.round_to_int(fade_to_total * nc_total) # Adding hermitian half: m_sp_log_ext = add_hermitian_half(m_sp_log) # Getting Cepstrum: m_rceps = np.fft.ifft(m_sp_log_ext).real m_rceps_minph = rceps_to_min_phase_rceps(m_rceps) #v_ener_orig_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1)) # Create window: v_win_shrt = np.hanning(2*nc_fade+3) v_win_shrt = v_win_shrt[nc_fade+2:-1] # Windowing: m_rceps_minph[:,nc_total:] = 0 m_rceps_minph[:,nc_total-nc_fade:nc_total] *= v_win_shrt # Energy compensation: #v_ener_after_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1)) #v_ener_fact = v_ener_orig_rms / v_ener_after_rms #m_rceps_minph = m_rceps_minph * v_ener_fact[:,None] # Go back to spectrum: nfft = m_rceps.shape[1] m_sp_log_sm = np.fft.fft(m_rceps_minph, n=nfft).real m_sp_log_sm = remove_hermitian_half(m_sp_log_sm) #m_sp_sm = np.exp(m_sp_sm) return m_sp_log_sm
def speech_interp_with_anchors(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len): # MagPhase analysis: m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless(wavfile_a) m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless(wavfile_b) v_pm_a = la.shift_to_pm(v_shift_a) v_pm_b = la.shift_to_pm(v_shift_b) v_sig_a, fs = la.read_audio_file(wavfile_a) v_sig_b, fs = la.read_audio_file(wavfile_b) # Get formants: v_mag_db_a_dummy, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_strt_a, fft_len) v_mag_db_b_dummy, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame(v_sig_b, v_pm_b, nx_strt_a+nframes, fft_len) # Formant mapping:---------------------------------------------------------------- v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping(v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len) v_shifts_syn = np.zeros(nframes, dtype='int') m_frms_syn = np.zeros((nframes, fft_len)) for nx_frm in xrange(nframes): sp_weight = nx_frm / (nframes-1.0) nx_a = nx_strt_a + nx_frm nx_b = nx_strt_b + nx_frm # Computing mag spectrum: v_mag_db_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_a, fft_len)[0] v_mag_db_b = get_formant_locations_from_raw_long_frame(v_sig_b, v_pm_b, nx_b, fft_len)[0] # NOT FINISHED !! # Warping:--------------------------------------------------------------------- # True envelope: v_true_env_db_a = la.true_envelope(v_mag_db_a[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_b = la.true_envelope(v_mag_db_b[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_sp_env_db_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) v_sp_env_db_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) # Spectral envelope mix:------------------------------------------------------- v_sp_env_db_targ = v_sp_env_db_a_warp * (1.0-sp_weight) + v_sp_env_db_b_warp * sp_weight # Source mix:------------------------------------------------------------------ v_spec_diff_db_a = v_sp_env_db_targ - v_true_env_db_a v_spec_diff_db_b = v_sp_env_db_targ - v_true_env_db_b # Filtering (FFT filter): v_frm_short_a_ext_filt = fft_filter(v_frm_short_a, shift_a, v_spec_diff_db_a, fft_len) v_frm_short_b_ext_filt = fft_filter(v_frm_short_b, shift_b, v_spec_diff_db_b, fft_len) # Mix signal: v_frm_short_ext_filt = v_frm_short_a_ext_filt * (1.0-sp_weight) + v_frm_short_b_ext_filt * sp_weight #v_frm_short_ext_filt = v_frm_short_a_ext_filt # Mix shifts: shift_mix = lu.round_to_int(shift_a * (1.0-sp_weight) + shift_b * sp_weight) # Save: v_shifts_syn[nx_frm] = shift_mix m_frms_syn[nx_frm, :] = v_frm_short_ext_filt if False: plt.figure(); plt.plot(v_frm_short_a_ext_filt); plt.plot(v_frm_short_b_ext_filt); plt.grid(); plt.show() plt.figure(); plt.plot(v_frm_short_a_ext_filt); plt.plot(v_frm_short_b_ext_filt); plt.plot(v_frm_short_ext_filt); plt.grid(); plt.show() # Merge: m_frms_syn_dc = np.fft.fftshift(m_frms_syn, axes=1) m_fft_syn = la.remove_hermitian_half(np.fft.fft(m_frms_syn_dc)) m_mag_syn, m_real_syn, m_imag_syn = compute_lossless_spec_feats(m_fft_syn) m_mag_merged = np.vstack((m_mag_a[:nx_strt_a,:] , m_mag_syn , m_mag_b[(nx_strt_b+nframes):,:])) m_real_merged = np.vstack((m_real_a[:nx_strt_a,:] , m_real_syn , m_real_b[(nx_strt_b+nframes):,:])) m_imag_merged = np.vstack((m_imag_a[:nx_strt_a,:] , m_imag_syn , m_imag_b[(nx_strt_b+nframes):,:])) v_shift_merged = np.r_[ v_shift_a[:nx_strt_a] , v_shifts_syn , v_shift_b[(nx_strt_b+nframes):] ] v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged) return v_sig_merged, fs
def get_formant_locations_from_raw_long_frame(wavfile, nx, fft_len): ''' nx: frame index ''' v_sig, fs = la.read_audio_file(wavfile) # Epoch detection: v_pm_sec, v_voi = la.reaper_epoch_detection(wavfile) v_pm = lu.round_to_int(v_pm_sec * fs) # Raw-long Frame extraction: v_frm_long = v_sig[v_pm[nx - 2]:v_pm[nx + 2] + 1] # Win: left_len = v_pm[nx] - v_pm[nx - 2] right_len = v_pm[nx + 2] - v_pm[nx] v_win = la.gen_non_symmetric_win(left_len, right_len, np.hanning, b_norm=False) v_frm_long_win = v_frm_long * v_win # Spectrum: v_mag = np.absolute(np.fft.fft(v_frm_long_win, n=fft_len)) v_mag_db = la.db(la.remove_hermitian_half(v_mag[None, :])[0]) # Formant extraction -LPC method:-------------------------------------------------- v_lpc, v_e, v_refl = lpc(v_frm_long_win, 120) b_use_lpc_roots = False if b_use_lpc_roots: v_lpc_roots = np.roots(v_lpc) v_lpc_angles = np.angle(v_lpc_roots) v_lpc_angles = v_lpc_angles[v_lpc_angles >= 0] v_lpc_angles = np.sort(v_lpc_angles) fft_len_half = 1 + fft_len / 2 v_lpc_roots_bins = v_lpc_angles * fft_len_half / np.pi v_lpc_mag = lpc_to_mag(v_lpc, fft_len=fft_len) v_lpc_mag_db = la.db(v_lpc_mag) v_lpc_mag_db = v_lpc_mag_db - np.mean(v_lpc_mag_db) + np.mean(v_mag_db) v_frmnts_bins, v_frmnts_gains_db = get_formant_locations_from_spec_env( v_lpc_mag_db) # Getting bandwidth: fft_len_half = 1 + fft_len / 2 v_vall_bins = get_formant_locations_from_spec_env(-v_lpc_mag_db)[0] v_vall_bins = np.r_[0, v_vall_bins, fft_len_half - 1] nfrmnts = v_frmnts_bins.size v_frmnts_bw = np.zeros(nfrmnts) - 1.0 for nx_f in xrange(nfrmnts): #Left slope: curr_frmnt_bin = v_frmnts_bins[nx_f] curr_vall_l_bin = v_vall_bins[nx_f] curr_vall_r_bin = v_vall_bins[nx_f + 1] curr_midp_l = int((curr_frmnt_bin + curr_vall_l_bin) / 2.0) curr_midp_r = int((curr_frmnt_bin + curr_vall_r_bin) / 2.0) slope_l = (v_frmnts_gains_db[nx_f] - v_lpc_mag_db[curr_midp_l]) / ( v_frmnts_bins[nx_f] - curr_midp_l).astype(float) slope_r = (v_frmnts_gains_db[nx_f] - v_lpc_mag_db[curr_midp_r]) / ( v_frmnts_bins[nx_f] - curr_midp_r).astype(float) slope_ave = (slope_l - slope_r) / 2.0 v_frmnts_bw[nx_f] = 1.0 / slope_ave # Filtering by bandwidth: bw_thress = 7.0 v_frmnts_bins = v_frmnts_bins[v_frmnts_bw < bw_thress] v_frmnts_gains_db = v_frmnts_gains_db[v_frmnts_bw < bw_thress] v_frmnts_bw = v_frmnts_bw[v_frmnts_bw < bw_thress] # Computing frame short:-------------------------------- # Win: left_len_short = v_pm[nx] - v_pm[nx - 1] right_len_short = v_pm[nx + 1] - v_pm[nx] v_win_short = la.gen_non_symmetric_win(left_len_short, right_len_short, np.hanning, b_norm=False) v_frm_short = v_sig[v_pm[nx - 1]:v_pm[nx + 1] + 1] v_frm_short_win = v_frm_short * v_win_short shift = v_pm[nx] - v_pm[nx - 1] # Formant extraction - True envelope method:---------------------------------------- # Not finished. #v_true_env_db = la.true_envelope(v_mag_db[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0] if False: plt.figure() plt.plot(v_mag_db) plt.plot(v_lpc_mag_db) plt.grid() plt.show() return v_mag_db, v_lpc_mag_db, v_frmnts_bins, v_frmnts_gains_db, v_frmnts_bw, v_frm_short_win, shift
def speech_interp_with_anchors(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len): # MagPhase analysis: m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless( wavfile_a) m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless( wavfile_b) v_pm_a = la.shift_to_pm(v_shift_a) v_pm_b = la.shift_to_pm(v_shift_b) v_sig_a, fs = la.read_audio_file(wavfile_a) v_sig_b, fs = la.read_audio_file(wavfile_b) # Get formants: v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame( v_sig_a, v_pm_a, nx_strt_a, fft_len) v_mag_db_b, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame( v_sig_b, v_pm_b, nx_strt_b + nframes - 1, fft_len) # Formant mapping:---------------------------------------------------------------- v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping( v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len) # spec envelope anchors:--------------------------------------------------------------------- v_true_env_db_a = la.true_envelope(v_mag_db_a[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_b = la.true_envelope(v_mag_db_b[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] if False: plt.figure() plt.plot(v_mag_db_a) plt.plot(v_true_env_db_a) plt.grid() plt.show() plt.figure() plt.plot(v_mag_db_b) plt.plot(v_true_env_db_b) plt.grid() plt.show() fft_len_half = 1 + fft_len / 2 m_mag_interp = np.zeros((nframes, fft_len_half)) m_real_interp = np.zeros((nframes, fft_len_half)) m_imag_interp = np.zeros((nframes, fft_len_half)) v_shifts_interp = np.zeros(nframes, dtype='int') for nx_frm in xrange(nframes): sp_weight = nx_frm / (nframes - 1.0) nx_a = nx_strt_a + nx_frm nx_b = nx_strt_b + nx_frm # Spectral Warping: v_sp_env_db_curr_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) v_sp_env_db_curr_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1 - sp_weight)) #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) # Spectral envelope mix:------------------------------------------------------- v_sp_env_db_curr_targ = v_sp_env_db_curr_a_warp * ( 1.0 - sp_weight) + v_sp_env_db_curr_b_warp * sp_weight # Whitening:----------------------------------------------------------------------------- # Spectral envelope estimation: # v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_a, fft_len) v_mag_db_curr_a = get_formant_locations_from_raw_long_frame( v_sig_a, v_pm_a, nx_a, fft_len)[0] v_mag_db_curr_b = get_formant_locations_from_raw_long_frame( v_sig_b, v_pm_b, nx_b, fft_len)[0] v_true_env_db_curr_a = la.true_envelope(v_mag_db_curr_a[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_curr_b = la.true_envelope(v_mag_db_curr_b[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_mag_white_a = m_mag_a[nx_a, :] / la.db(v_true_env_db_curr_a, b_inv=True) v_mag_white_b = m_mag_b[nx_b, :] / la.db(v_true_env_db_curr_b, b_inv=True) #if sp_weight>=0.4: import ipdb; ipdb.set_trace(context=8) # breakpoint 6b3a7d8b // if False: plt.figure() plt.plot(v_mag_db_curr_a) plt.plot(v_true_env_db_curr_a) plt.grid() plt.show() plt.figure() plt.plot(v_true_env_db_curr_a) plt.plot(v_true_env_db_curr_b) plt.plot(v_sp_env_db_curr_targ) plt.grid() plt.show() plt.figure() plt.plot(v_mag_db_curr_a) plt.plot(la.db(m_mag_a[nx_a, :])) plt.plot(la.db(v_mag_white_a)) plt.plot(v_true_env_db_curr_a) plt.grid() plt.show() # Impose spectral Env:------------------------------------------------------------------ v_sp_env_targ = la.db(v_sp_env_db_curr_targ, b_inv=True) v_mag_filt_a = v_mag_white_a * v_sp_env_targ v_mag_filt_b = v_mag_white_b * v_sp_env_targ # Mix Sources:------------------------------------------------------------------ v_mag_mix = v_mag_filt_a * (1.0 - sp_weight) + v_mag_filt_b * sp_weight v_real_mix = m_real_a[nx_a, :] * ( 1.0 - sp_weight) + m_real_b[nx_b, :] * sp_weight v_imag_mix = m_imag_a[nx_a, :] * ( 1.0 - sp_weight) + m_imag_b[nx_b, :] * sp_weight # Mix shifts: shift_mix = lu.round_to_int(shift_a * (1.0 - sp_weight) + shift_b * sp_weight) # Save: v_shifts_interp[nx_frm] = shift_mix m_mag_interp[nx_frm, :] = v_mag_mix m_real_interp[nx_frm, :] = v_real_mix m_imag_interp[nx_frm, :] = v_imag_mix if False: plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.grid() plt.show() plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.plot(v_frm_short_ext_filt) plt.grid() plt.show() # Merge: m_mag_merged = np.vstack((m_mag_a[:nx_strt_a, :], m_mag_interp, m_mag_b[(nx_strt_b + nframes):, :])) m_real_merged = np.vstack((m_real_a[:nx_strt_a, :], m_real_interp, m_real_b[(nx_strt_b + nframes):, :])) m_imag_merged = np.vstack((m_imag_a[:nx_strt_a, :], m_imag_interp, m_imag_b[(nx_strt_b + nframes):, :])) v_shift_merged = np.r_[v_shift_a[:nx_strt_a], v_shifts_interp, v_shift_b[(nx_strt_b + nframes):]] v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged) return v_sig_merged, fs
def speech_interp(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len): # MagPhase analysis: m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless( wavfile_a) m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless( wavfile_b) v_pm_a = la.shift_to_pm(v_shift_a) v_pm_b = la.shift_to_pm(v_shift_b) v_sig_a, fs = la.read_audio_file(wavfile_a) v_sig_b, fs = la.read_audio_file(wavfile_b) # Epoch detection: #v_pm_sec_a, v_voi_a = la.reaper_epoch_detection(wavfile_a) #v_pm_sec_b, v_voi_b = la.reaper_epoch_detection(wavfile_b) #v_pm_a = lu.round_to_int(v_pm_sec_a * fs) #v_pm_b = lu.round_to_int(v_pm_sec_b * fs) #m_frms_syn = np.zeros((nframes, fft_len)) fft_len_half = 1 + fft_len / 2 m_mag_interp = np.zeros((nframes, fft_len_half)) m_real_interp = np.zeros((nframes, fft_len_half)) m_imag_interp = np.zeros((nframes, fft_len_half)) v_shifts_interp = np.zeros(nframes, dtype='int') for nx_frm in xrange(nframes): sp_weight = nx_frm / (nframes - 1.0) nx_a = nx_strt_a + nx_frm nx_b = nx_strt_b + nx_frm # Get formants: v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame( v_sig_a, v_pm_a, nx_a, fft_len) v_mag_db_b, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame( v_sig_b, v_pm_b, nx_b, fft_len) # Formant mapping:---------------------------------------------------------------- v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping( v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len) # Warping:--------------------------------------------------------------------- # True envelope: v_true_env_db_a = la.true_envelope(v_mag_db_a[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_true_env_db_b = la.true_envelope(v_mag_db_b[None, :], in_type='db', ncoeffs=400, thres_db=0.1)[0] v_sp_env_db_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) v_sp_env_db_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1 - sp_weight)) #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight) #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight)) # Spectral envelope mix:------------------------------------------------------- v_sp_env_db_targ = v_sp_env_db_a_warp * ( 1.0 - sp_weight) + v_sp_env_db_b_warp * sp_weight # Impose spectral Env (FFT filter):------------------------------------------------------ v_sp_env_diff_db_a = v_sp_env_db_targ - v_true_env_db_a v_sp_env_diff_db_b = v_sp_env_db_targ - v_true_env_db_b #v_frm_short_a_ext_filt = fft_filter(v_frm_short_a, shift_a, v_spec_diff_db_a, fft_len) #v_frm_short_b_ext_filt = fft_filter(v_frm_short_b, shift_b, v_spec_diff_db_b, fft_len) #v_sp_env_diff_a = la.db(v_sp_env_diff_db_a, b_inv=True) v_mag_filt_a = m_mag_a[nx_a, :] * la.db(v_sp_env_diff_db_a, b_inv=True) #v_sp_env_diff_b = la.db(v_sp_env_diff_db_b, b_inv=True) v_mag_filt_b = m_mag_b[nx_b, :] * la.db(v_sp_env_diff_db_b, b_inv=True) #fft_filter_magphase_domain(m_mag_a[nx_a,:], m_real_a[nx_a,:], m_imag_a[nx_a,:]) # Mix Sources:------------------------------------------------------------------ v_mag_mix = v_mag_filt_a * (1.0 - sp_weight) + v_mag_filt_b * sp_weight v_real_mix = m_real_a[nx_a, :] * ( 1.0 - sp_weight) + m_real_b[nx_b, :] * sp_weight v_imag_mix = m_imag_a[nx_a, :] * ( 1.0 - sp_weight) + m_imag_b[nx_b, :] * sp_weight # Mix sources: #v_frm_short_ext_filt = v_frm_short_a_ext_filt * (1.0-sp_weight) + v_frm_short_b_ext_filt * sp_weight #v_frm_short_ext_filt = v_frm_short_a_ext_filt # Mix shifts: shift_mix = lu.round_to_int(shift_a * (1.0 - sp_weight) + shift_b * sp_weight) # Save: v_shifts_interp[nx_frm] = shift_mix m_mag_interp[nx_frm, :] = v_mag_mix m_real_interp[nx_frm, :] = v_real_mix m_imag_interp[nx_frm, :] = v_imag_mix #m_frms_syn[nx_frm, :] = v_frm_short_ext_filt if False: plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.grid() plt.show() plt.figure() plt.plot(v_frm_short_a_ext_filt) plt.plot(v_frm_short_b_ext_filt) plt.plot(v_frm_short_ext_filt) plt.grid() plt.show() # Merge: #m_frms_syn_dc = np.fft.fftshift(m_frms_syn, axes=1) #m_fft_syn = la.remove_hermitian_half(np.fft.fft(m_frms_syn_dc)) #m_mag_syn, m_real_syn, m_imag_syn = compute_lossless_spec_feats(m_fft_syn) m_mag_merged = np.vstack((m_mag_a[:nx_strt_a, :], m_mag_interp, m_mag_b[(nx_strt_b + nframes):, :])) m_real_merged = np.vstack((m_real_a[:nx_strt_a, :], m_real_interp, m_real_b[(nx_strt_b + nframes):, :])) m_imag_merged = np.vstack((m_imag_a[:nx_strt_a, :], m_imag_interp, m_imag_b[(nx_strt_b + nframes):, :])) v_shift_merged = np.r_[v_shift_a[:nx_strt_a], v_shifts_interp, v_shift_b[(nx_strt_b + nframes):]] v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged) return v_sig_merged, fs