Ejemplo n.º 1
0
def spectral_crossfade(m_sp_l, m_sp_r, cut_off, bw, fs, freq_scale='hz'):

    # Hz to bin:
    nFFThalf = m_sp_l.shape[1]
    nFFT = (nFFThalf - 1) * 2
    bin_l = lu.round_to_int(hz_to_bin(cut_off - bw / 2, nFFT, fs))
    bin_r = lu.round_to_int(hz_to_bin(cut_off + bw / 2, nFFT, fs))

    # Gen short windows:
    bw_bin = bin_r - bin_l
    v_win_shrt = np.hanning(2 * bw_bin + 1)
    v_win_shrt_l = v_win_shrt[bw_bin:]
    v_win_shrt_r = v_win_shrt[:bw_bin + 1]

    # Gen long windows:
    v_win_l = np.hstack(
        (np.ones(bin_l), v_win_shrt_l, np.zeros(nFFThalf - bin_r - 1)))
    v_win_r = np.hstack(
        (np.zeros(bin_l), v_win_shrt_r, np.ones(nFFThalf - bin_r - 1)))

    # Apply windows:
    m_sp_l_win = m_sp_l * v_win_l[None, :]
    m_sp_r_win = m_sp_r * v_win_r[None, :]
    m_sp = m_sp_l_win + m_sp_r_win

    return m_sp
Ejemplo n.º 2
0
def spectral_crossfade(m_sp_l, m_sp_r, cut_off, bw, fs, freq_scale='hz'):
    '''
    holdon()
    nx = 120
    plot(m_sp_l[nx,:], '-b')
    plot(m_sp_r[nx,:], '-r')
    holdoff()
    '''

    # Hz to bin:
    nFFThalf = m_sp_l.shape[1]
    nFFT = (nFFThalf - 1) * 2
    bin_l = lu.round_to_int(hz_to_bin(cut_off - bw / 2, nFFT, fs))
    bin_r = lu.round_to_int(hz_to_bin(cut_off + bw / 2, nFFT, fs))

    # Gen short windows:
    bw_bin = bin_r - bin_l
    v_win_shrt = np.hanning(2 * bw_bin + 1)
    v_win_shrt_l = v_win_shrt[bw_bin:]
    v_win_shrt_r = v_win_shrt[:bw_bin + 1]

    # Gen long windows:
    v_win_l = np.hstack(
        (np.ones(bin_l), v_win_shrt_l, np.zeros(nFFThalf - bin_r - 1)))
    v_win_r = np.hstack(
        (np.zeros(bin_l), v_win_shrt_r, np.ones(nFFThalf - bin_r - 1)))

    # Apply windows:
    m_sp_l_win = m_sp_l * v_win_l[None, :]
    m_sp_r_win = m_sp_r * v_win_r[None, :]
    m_sp = m_sp_l_win + m_sp_r_win
    '''
    holdon()
    nx = 220
    plot(m_sp_l[nx,:], '-.b')
    plot(m_sp_l_win[nx,:], '-.r')
    plot(m_sp_r[nx,:], '-g')
    plot(m_sp_r_win[nx,:], '-k')
    holdoff()
    '''
    '''
    holdon()
    nx = 196
    plot(db(m_sp_l[nx,:]), '.-b')
    #plot(db(m_sp_l_win[nx,:]), '.-r')
    #plot(db(m_sp_r_win[nx,:]), '-g')
    plot(db(m_sp[nx,:]), '-k')
    holdoff()
    '''

    return m_sp
Ejemplo n.º 3
0
def unwarp_from_fbank(m_mag_mel, v_bins_warp, interp_kind='quadratic'):
    '''
    n_bins: number of frequency bins (i.e., Hz).
    v_bins_warp: Mapping from input bins to output (monotonically crescent from 0 to any positive number).
                 Requirement: length = m_mag.shape[1]. If wanted, use build_mel_curve(...) to construct it.
    '''

    nfrms, n_melbands = m_mag_mel.shape
    n_bins = v_bins_warp.size

    # Bands gen:
    maxval = v_bins_warp[-1]
    v_cntrs_mel = np.linspace(0, maxval, n_melbands)

    # To linear frequency:
    f_interp = interpolate.interp1d(v_bins_warp,
                                    np.arange(n_bins),
                                    kind=interp_kind)
    v_cntrs = lu.round_to_int(f_interp(v_cntrs_mel))

    # Process per frame:
    v_bins = np.arange(n_bins)
    m_mag = np.zeros((nfrms, n_bins))
    for nxf in xrange(nfrms):
        f_interp = interpolate.interp1d(v_cntrs,
                                        m_mag_mel[nxf, :],
                                        kind=interp_kind)
        #f_interp = interpolate.interp1d(v_cntrs, m_mag_mel[nxf,:], kind='linear')
        m_mag[nxf, :] = f_interp(v_bins)

    return m_mag
Ejemplo n.º 4
0
def read_reaper_est_file(est_file,
                         check_len_smpls=-1,
                         fs=-1,
                         skiprows=7,
                         usecols=[0, 1]):

    # Checking input params:
    if (check_len_smpls > 0) and (fs == -1):
        raise ValueError(
            'If check_len_smpls given, fs must be provided as well.')

    # Read text: TODO: improve skiprows
    m_data = np.loadtxt(est_file, skiprows=skiprows, usecols=usecols)
    m_data = np.atleast_2d(m_data)
    v_pm_sec = m_data[:, 0]
    v_voi = m_data[:, 1]

    # Protection against REAPER bugs 1:
    vb_correct = np.hstack((True, np.diff(v_pm_sec) > 0))
    v_pm_sec = v_pm_sec[vb_correct]
    v_voi = v_voi[vb_correct]

    # Protection against REAPER bugs 2 (maybe it needs a better protection):
    if (check_len_smpls > 0):
        v_pm_smpls = lu.round_to_int(v_pm_sec * fs)
        if (v_pm_smpls[-1] >= (check_len_smpls - 1)):
            vb_correct_2 = v_pm_smpls < (check_len_smpls - 1)
            v_pm_smpls = v_pm_smpls[vb_correct_2]
            v_pm_sec = v_pm_sec[vb_correct_2]
            v_voi = v_voi[vb_correct_2]

    return v_pm_sec, v_voi
Ejemplo n.º 5
0
def gen_wider_window(func_win, len_l, len_r, flat_to_len_ratio):
    fade_to_len_ratio = 1 - flat_to_len_ratio

    len_l = lu.round_to_int(len_l)
    len_r = lu.round_to_int(len_r)

    len_l_fade = lu.round_to_int(fade_to_len_ratio * len_l)
    len_r_fade = lu.round_to_int(fade_to_len_ratio * len_r)

    v_win_l = func_win(2 * len_l_fade + 1)
    v_win_l = v_win_l[:len_l_fade]
    v_win_r = func_win(2 * len_r_fade + 1)
    v_win_r = v_win_r[len_r_fade + 1:]
    len_total = len_l + len_r
    len_flat = len_total - (len_l_fade + len_r_fade)
    v_win = np.hstack((v_win_l, np.ones(len_flat), v_win_r))

    return v_win
Ejemplo n.º 6
0
def apply_fbank(m_mag,
                v_bins_warp,
                nbands,
                win_func=np.hanning,
                mode='average'):
    '''
    Applies an average filter bank.
    nbands: number of output bands.
    v_bins_warp: Mapping from input bins to output (monotonically crescent from 0 to any positive number).
                 Requirement: length = m_mag.shape[1]. If wanted, use build_mel_curve(...) to construct it.
    '''
    nfrms, nbins = m_mag.shape

    # Bands gen:
    maxval = v_bins_warp[-1]
    v_cntrs_mel = np.linspace(0, maxval, nbands)

    # To linear frequency:
    f_interp = interpolate.interp1d(v_bins_warp,
                                    np.arange(nbins),
                                    kind='quadratic')
    v_cntrs = lu.round_to_int(f_interp(v_cntrs_mel))

    # Build filter bank:
    m_fbank = np.zeros((nbins, nbands))
    v_cntrs_ext = np.r_[v_cntrs[0], v_cntrs, v_cntrs[-1]]
    v_winlen = np.zeros(nbands)
    for nxb in xrange(1, nbands + 1):
        winlen_l = v_cntrs_ext[nxb] - v_cntrs_ext[nxb - 1]
        winlen_r = v_cntrs_ext[nxb + 1] - v_cntrs_ext[nxb]
        v_win = gen_non_symmetric_win(winlen_l,
                                      winlen_r,
                                      win_func=win_func,
                                      b_norm=True)
        winlen = v_win.size
        v_winlen[nxb - 1] = winlen
        m_fbank[v_cntrs_ext[nxb - 1]:(v_cntrs_ext[nxb - 1] + winlen),
                nxb - 1] = v_win

    # Apply filterbank:
    if mode == 'average':
        m_mag_mel = np.dot(m_mag, m_fbank)
    elif mode == 'maxabs':
        m_mag_mel = np.zeros((nfrms, nbands))
        for nxf in xrange(nfrms):
            v_mag = m_mag[nxf, :]
            m_filtered = v_mag[:, None] * m_fbank
            v_nx_max = np.argmax(np.abs(m_filtered), axis=0)
            m_mag_mel[nxf, :] = v_mag[v_nx_max]

    return m_mag_mel, v_winlen
Ejemplo n.º 7
0
def spectral_crossfade(m_sp_l, m_sp_r, cut_off, bw, fs, freq_scale='hz'):

    # Hz to bin:
    nFFThalf = m_sp_l.shape[1]
    nFFT     = (nFFThalf - 1) * 2    
    bin_l    = lu.round_to_int(hz_to_bin(cut_off - bw/2, nFFT, fs))     
    bin_r    = lu.round_to_int(hz_to_bin(cut_off + bw/2, nFFT, fs))

    # Gen short windows:
    bw_bin       = bin_r - bin_l
    v_win_shrt   = np.hanning(2*bw_bin + 1)
    v_win_shrt_l = v_win_shrt[bw_bin:]
    v_win_shrt_r = v_win_shrt[:bw_bin+1]
    
    # Gen long windows:
    v_win_l = np.hstack((np.ones(bin_l),  v_win_shrt_l , np.zeros(nFFThalf - bin_r - 1)))
    v_win_r = np.hstack((np.zeros(bin_l), v_win_shrt_r , np.ones(nFFThalf - bin_r - 1)))
    
    # Apply windows:
    m_sp_l_win = m_sp_l * v_win_l[None,:]
    m_sp_r_win = m_sp_r * v_win_r[None,:]
    m_sp       = m_sp_l_win + m_sp_r_win
    
    return m_sp
Ejemplo n.º 8
0
def rceps_spectral_smoothing(m_sp,
                             in_type='splog',
                             nc_total=60,
                             fade_to_total=0.2):

    #dp = lu.DimProtect()
    #m_sp = dp.start(m_sp)

    nc_fade = lu.round_to_int(fade_to_total * nc_total)

    # Getting Cepstrum:
    m_rceps = rceps(m_sp, in_type=in_type)
    m_minph_rceps = rceps_to_min_phase(m_rceps)
    v_ener_orig_rms = np.sqrt(np.mean(m_minph_rceps**2, axis=1))

    # Create window:
    v_win_shrt = np.hanning(2 * nc_fade + 3)
    v_win_shrt = v_win_shrt[nc_fade + 2:-1]

    # Windowing:
    m_minph_rceps[:, nc_total:] = 0
    m_minph_rceps[:, nc_total - nc_fade:nc_total] *= v_win_shrt

    # Energy compensation:
    v_ener_after_rms = np.sqrt(np.mean(m_minph_rceps**2, axis=1))
    v_ener_fact = v_ener_orig_rms / v_ener_after_rms
    m_minph_rceps = m_minph_rceps * v_ener_fact[:, None]

    # Go back to spectrum:
    nFFT = m_rceps.shape[1]
    m_sp_sm = np.fft.fft(m_minph_rceps, n=nFFT).real
    m_sp_sm = remove_hermitian_half(m_sp_sm)

    # Plots:
    #    from libdevhelpers import *
    #    holdon()
    #    nx = 134
    #    plot(np.log(m_sp[nx,:]),    '-b')
    #    plot(m_sp_sm[nx,:], '-r')
    #    holdoff()

    return m_sp_sm
Ejemplo n.º 9
0
def spectral_smoothing_rceps(m_sp_log, nc_total=60, fade_to_total=0.2):
    '''
    m_sp_log could be in any base log or decibels.
    '''

    nc_fade = lu.round_to_int(fade_to_total * nc_total)

    # Adding hermitian half:
    m_sp_log_ext = add_hermitian_half(m_sp_log)

    # Getting Cepstrum:
    m_rceps = np.fft.ifft(m_sp_log_ext).real

    m_rceps_minph = rceps_to_min_phase_rceps(m_rceps)
    #v_ener_orig_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1))

    # Create window:
    v_win_shrt = np.hanning(2 * nc_fade + 3)
    v_win_shrt = v_win_shrt[nc_fade + 2:-1]

    # Windowing:
    m_rceps_minph[:, nc_total:] = 0
    m_rceps_minph[:, nc_total - nc_fade:nc_total] *= v_win_shrt

    # Energy compensation:
    #v_ener_after_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1))
    #v_ener_fact      = v_ener_orig_rms / v_ener_after_rms
    #m_rceps_minph    = m_rceps_minph * v_ener_fact[:,None]

    # Go back to spectrum:
    nfft = m_rceps.shape[1]
    m_sp_log_sm = np.fft.fft(m_rceps_minph, n=nfft).real
    m_sp_log_sm = remove_hermitian_half(m_sp_log_sm)
    #m_sp_sm = np.exp(m_sp_sm)

    return m_sp_log_sm
Ejemplo n.º 10
0
def spectral_smoothing_rceps(m_sp_log, nc_total=60, fade_to_total=0.2):
    '''
    m_sp_log could be in any base log or decibels.
    '''

    nc_fade = lu.round_to_int(fade_to_total * nc_total)

    # Adding hermitian half:
    m_sp_log_ext = add_hermitian_half(m_sp_log)

    # Getting Cepstrum:
    m_rceps = np.fft.ifft(m_sp_log_ext).real

    m_rceps_minph = rceps_to_min_phase_rceps(m_rceps)
    #v_ener_orig_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1))
    
    # Create window:
    v_win_shrt = np.hanning(2*nc_fade+3)
    v_win_shrt = v_win_shrt[nc_fade+2:-1]    
        
    # Windowing:    
    m_rceps_minph[:,nc_total:] = 0
    m_rceps_minph[:,nc_total-nc_fade:nc_total] *= v_win_shrt

    # Energy compensation:
    #v_ener_after_rms = np.sqrt(np.mean(m_rceps_minph**2,axis=1))
    #v_ener_fact      = v_ener_orig_rms / v_ener_after_rms
    #m_rceps_minph    = m_rceps_minph * v_ener_fact[:,None]
    
    # Go back to spectrum:
    nfft        = m_rceps.shape[1]
    m_sp_log_sm = np.fft.fft(m_rceps_minph, n=nfft).real
    m_sp_log_sm = remove_hermitian_half(m_sp_log_sm)
    #m_sp_sm = np.exp(m_sp_sm)
    
    return m_sp_log_sm
Ejemplo n.º 11
0
def speech_interp_with_anchors(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes, fft_len):

    # MagPhase analysis:
    m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless(wavfile_a)
    m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless(wavfile_b)

    v_pm_a = la.shift_to_pm(v_shift_a)
    v_pm_b = la.shift_to_pm(v_shift_b)

    v_sig_a, fs = la.read_audio_file(wavfile_a)
    v_sig_b, fs = la.read_audio_file(wavfile_b)


    # Get formants:
    v_mag_db_a_dummy, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_strt_a, fft_len)
    v_mag_db_b_dummy, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame(v_sig_b, v_pm_b, nx_strt_a+nframes, fft_len)

    # Formant mapping:----------------------------------------------------------------
    v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping(v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b, v_frmnts_gains_db_b, fft_len)


    v_shifts_syn = np.zeros(nframes, dtype='int')
    m_frms_syn   = np.zeros((nframes, fft_len))

    for nx_frm in xrange(nframes):

        sp_weight = nx_frm / (nframes-1.0)
        nx_a = nx_strt_a + nx_frm
        nx_b = nx_strt_b + nx_frm

        # Computing mag spectrum:
        v_mag_db_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_a, fft_len)[0]
        v_mag_db_b = get_formant_locations_from_raw_long_frame(v_sig_b, v_pm_b, nx_b, fft_len)[0]

        # NOT FINISHED !!
        # Warping:---------------------------------------------------------------------

        # True envelope:
        v_true_env_db_a = la.true_envelope(v_mag_db_a[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0]
        v_true_env_db_b = la.true_envelope(v_mag_db_b[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0]

        v_sp_env_db_a_warp = warp_mag_spec(v_true_env_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight)
        v_sp_env_db_b_warp = warp_mag_spec(v_true_env_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight))

        #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight)
        #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight))

        # Spectral envelope mix:-------------------------------------------------------
        v_sp_env_db_targ = v_sp_env_db_a_warp * (1.0-sp_weight) + v_sp_env_db_b_warp * sp_weight

        # Source mix:------------------------------------------------------------------
        v_spec_diff_db_a = v_sp_env_db_targ - v_true_env_db_a
        v_spec_diff_db_b = v_sp_env_db_targ - v_true_env_db_b

        # Filtering (FFT filter):
        v_frm_short_a_ext_filt = fft_filter(v_frm_short_a, shift_a, v_spec_diff_db_a, fft_len)
        v_frm_short_b_ext_filt = fft_filter(v_frm_short_b, shift_b, v_spec_diff_db_b, fft_len)

        # Mix signal:
        v_frm_short_ext_filt = v_frm_short_a_ext_filt * (1.0-sp_weight) + v_frm_short_b_ext_filt * sp_weight
        #v_frm_short_ext_filt = v_frm_short_a_ext_filt

        # Mix shifts:
        shift_mix = lu.round_to_int(shift_a * (1.0-sp_weight) + shift_b * sp_weight)

        # Save:
        v_shifts_syn[nx_frm] = shift_mix
        m_frms_syn[nx_frm, :] = v_frm_short_ext_filt



        if False:
            plt.figure(); plt.plot(v_frm_short_a_ext_filt); plt.plot(v_frm_short_b_ext_filt); plt.grid(); plt.show()
            plt.figure(); plt.plot(v_frm_short_a_ext_filt); plt.plot(v_frm_short_b_ext_filt); plt.plot(v_frm_short_ext_filt); plt.grid(); plt.show()


    # Merge:
    m_frms_syn_dc = np.fft.fftshift(m_frms_syn,  axes=1)
    m_fft_syn     = la.remove_hermitian_half(np.fft.fft(m_frms_syn_dc))
    m_mag_syn, m_real_syn, m_imag_syn = compute_lossless_spec_feats(m_fft_syn)

    m_mag_merged   = np.vstack((m_mag_a[:nx_strt_a,:] , m_mag_syn , m_mag_b[(nx_strt_b+nframes):,:]))
    m_real_merged  = np.vstack((m_real_a[:nx_strt_a,:] , m_real_syn , m_real_b[(nx_strt_b+nframes):,:]))
    m_imag_merged  = np.vstack((m_imag_a[:nx_strt_a,:] , m_imag_syn , m_imag_b[(nx_strt_b+nframes):,:]))
    v_shift_merged = np.r_[ v_shift_a[:nx_strt_a] , v_shifts_syn , v_shift_b[(nx_strt_b+nframes):] ]


    v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged, m_imag_merged, v_shift_merged)

    return v_sig_merged, fs
Ejemplo n.º 12
0
def get_formant_locations_from_raw_long_frame(wavfile, nx, fft_len):
    '''
    nx: frame index
    '''

    v_sig, fs = la.read_audio_file(wavfile)

    # Epoch detection:
    v_pm_sec, v_voi = la.reaper_epoch_detection(wavfile)
    v_pm = lu.round_to_int(v_pm_sec * fs)

    # Raw-long Frame extraction:

    v_frm_long = v_sig[v_pm[nx - 2]:v_pm[nx + 2] + 1]

    # Win:
    left_len = v_pm[nx] - v_pm[nx - 2]
    right_len = v_pm[nx + 2] - v_pm[nx]
    v_win = la.gen_non_symmetric_win(left_len,
                                     right_len,
                                     np.hanning,
                                     b_norm=False)
    v_frm_long_win = v_frm_long * v_win

    # Spectrum:
    v_mag = np.absolute(np.fft.fft(v_frm_long_win, n=fft_len))
    v_mag_db = la.db(la.remove_hermitian_half(v_mag[None, :])[0])

    # Formant extraction -LPC method:--------------------------------------------------
    v_lpc, v_e, v_refl = lpc(v_frm_long_win, 120)

    b_use_lpc_roots = False
    if b_use_lpc_roots:
        v_lpc_roots = np.roots(v_lpc)
        v_lpc_angles = np.angle(v_lpc_roots)
        v_lpc_angles = v_lpc_angles[v_lpc_angles >= 0]
        v_lpc_angles = np.sort(v_lpc_angles)
        fft_len_half = 1 + fft_len / 2
        v_lpc_roots_bins = v_lpc_angles * fft_len_half / np.pi

    v_lpc_mag = lpc_to_mag(v_lpc, fft_len=fft_len)
    v_lpc_mag_db = la.db(v_lpc_mag)
    v_lpc_mag_db = v_lpc_mag_db - np.mean(v_lpc_mag_db) + np.mean(v_mag_db)

    v_frmnts_bins, v_frmnts_gains_db = get_formant_locations_from_spec_env(
        v_lpc_mag_db)

    # Getting bandwidth:
    fft_len_half = 1 + fft_len / 2
    v_vall_bins = get_formant_locations_from_spec_env(-v_lpc_mag_db)[0]
    v_vall_bins = np.r_[0, v_vall_bins, fft_len_half - 1]

    nfrmnts = v_frmnts_bins.size
    v_frmnts_bw = np.zeros(nfrmnts) - 1.0
    for nx_f in xrange(nfrmnts):
        #Left slope:
        curr_frmnt_bin = v_frmnts_bins[nx_f]
        curr_vall_l_bin = v_vall_bins[nx_f]
        curr_vall_r_bin = v_vall_bins[nx_f + 1]

        curr_midp_l = int((curr_frmnt_bin + curr_vall_l_bin) / 2.0)
        curr_midp_r = int((curr_frmnt_bin + curr_vall_r_bin) / 2.0)

        slope_l = (v_frmnts_gains_db[nx_f] - v_lpc_mag_db[curr_midp_l]) / (
            v_frmnts_bins[nx_f] - curr_midp_l).astype(float)
        slope_r = (v_frmnts_gains_db[nx_f] - v_lpc_mag_db[curr_midp_r]) / (
            v_frmnts_bins[nx_f] - curr_midp_r).astype(float)

        slope_ave = (slope_l - slope_r) / 2.0

        v_frmnts_bw[nx_f] = 1.0 / slope_ave

    # Filtering by bandwidth:
    bw_thress = 7.0
    v_frmnts_bins = v_frmnts_bins[v_frmnts_bw < bw_thress]
    v_frmnts_gains_db = v_frmnts_gains_db[v_frmnts_bw < bw_thress]
    v_frmnts_bw = v_frmnts_bw[v_frmnts_bw < bw_thress]

    # Computing frame short:--------------------------------
    # Win:
    left_len_short = v_pm[nx] - v_pm[nx - 1]
    right_len_short = v_pm[nx + 1] - v_pm[nx]
    v_win_short = la.gen_non_symmetric_win(left_len_short,
                                           right_len_short,
                                           np.hanning,
                                           b_norm=False)
    v_frm_short = v_sig[v_pm[nx - 1]:v_pm[nx + 1] + 1]
    v_frm_short_win = v_frm_short * v_win_short
    shift = v_pm[nx] - v_pm[nx - 1]

    # Formant extraction - True envelope method:----------------------------------------
    # Not finished.
    #v_true_env_db = la.true_envelope(v_mag_db[None,:], in_type='db', ncoeffs=400, thres_db=0.1)[0]

    if False:
        plt.figure()
        plt.plot(v_mag_db)
        plt.plot(v_lpc_mag_db)
        plt.grid()
        plt.show()

    return v_mag_db, v_lpc_mag_db, v_frmnts_bins, v_frmnts_gains_db, v_frmnts_bw, v_frm_short_win, shift
Ejemplo n.º 13
0
def speech_interp_with_anchors(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b,
                               nframes, fft_len):

    # MagPhase analysis:
    m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless(
        wavfile_a)
    m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless(
        wavfile_b)

    v_pm_a = la.shift_to_pm(v_shift_a)
    v_pm_b = la.shift_to_pm(v_shift_b)

    v_sig_a, fs = la.read_audio_file(wavfile_a)
    v_sig_b, fs = la.read_audio_file(wavfile_b)

    # Get formants:
    v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(
        v_sig_a, v_pm_a, nx_strt_a, fft_len)
    v_mag_db_b, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame(
        v_sig_b, v_pm_b, nx_strt_b + nframes - 1, fft_len)

    # Formant mapping:----------------------------------------------------------------
    v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping(
        v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b,
        v_frmnts_gains_db_b, fft_len)

    # spec envelope anchors:---------------------------------------------------------------------
    v_true_env_db_a = la.true_envelope(v_mag_db_a[None, :],
                                       in_type='db',
                                       ncoeffs=400,
                                       thres_db=0.1)[0]
    v_true_env_db_b = la.true_envelope(v_mag_db_b[None, :],
                                       in_type='db',
                                       ncoeffs=400,
                                       thres_db=0.1)[0]

    if False:
        plt.figure()
        plt.plot(v_mag_db_a)
        plt.plot(v_true_env_db_a)
        plt.grid()
        plt.show()
        plt.figure()
        plt.plot(v_mag_db_b)
        plt.plot(v_true_env_db_b)
        plt.grid()
        plt.show()

    fft_len_half = 1 + fft_len / 2
    m_mag_interp = np.zeros((nframes, fft_len_half))
    m_real_interp = np.zeros((nframes, fft_len_half))
    m_imag_interp = np.zeros((nframes, fft_len_half))
    v_shifts_interp = np.zeros(nframes, dtype='int')

    for nx_frm in xrange(nframes):

        sp_weight = nx_frm / (nframes - 1.0)
        nx_a = nx_strt_a + nx_frm
        nx_b = nx_strt_b + nx_frm

        # Spectral Warping:
        v_sp_env_db_curr_a_warp = warp_mag_spec(v_true_env_db_a,
                                                v_frmnts_bins_a_filt,
                                                v_frmnts_bins_b_filt, fft_len,
                                                sp_weight)
        v_sp_env_db_curr_b_warp = warp_mag_spec(v_true_env_db_b,
                                                v_frmnts_bins_b_filt,
                                                v_frmnts_bins_a_filt, fft_len,
                                                (1 - sp_weight))

        #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight)
        #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight))

        # Spectral envelope mix:-------------------------------------------------------
        v_sp_env_db_curr_targ = v_sp_env_db_curr_a_warp * (
            1.0 - sp_weight) + v_sp_env_db_curr_b_warp * sp_weight

        # Whitening:-----------------------------------------------------------------------------
        # Spectral envelope estimation:
        # v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(v_sig_a, v_pm_a, nx_a, fft_len)
        v_mag_db_curr_a = get_formant_locations_from_raw_long_frame(
            v_sig_a, v_pm_a, nx_a, fft_len)[0]
        v_mag_db_curr_b = get_formant_locations_from_raw_long_frame(
            v_sig_b, v_pm_b, nx_b, fft_len)[0]
        v_true_env_db_curr_a = la.true_envelope(v_mag_db_curr_a[None, :],
                                                in_type='db',
                                                ncoeffs=400,
                                                thres_db=0.1)[0]
        v_true_env_db_curr_b = la.true_envelope(v_mag_db_curr_b[None, :],
                                                in_type='db',
                                                ncoeffs=400,
                                                thres_db=0.1)[0]

        v_mag_white_a = m_mag_a[nx_a, :] / la.db(v_true_env_db_curr_a,
                                                 b_inv=True)
        v_mag_white_b = m_mag_b[nx_b, :] / la.db(v_true_env_db_curr_b,
                                                 b_inv=True)
        #if sp_weight>=0.4: import ipdb; ipdb.set_trace(context=8)  # breakpoint 6b3a7d8b //

        if False:
            plt.figure()
            plt.plot(v_mag_db_curr_a)
            plt.plot(v_true_env_db_curr_a)
            plt.grid()
            plt.show()
            plt.figure()
            plt.plot(v_true_env_db_curr_a)
            plt.plot(v_true_env_db_curr_b)
            plt.plot(v_sp_env_db_curr_targ)
            plt.grid()
            plt.show()

            plt.figure()
            plt.plot(v_mag_db_curr_a)
            plt.plot(la.db(m_mag_a[nx_a, :]))
            plt.plot(la.db(v_mag_white_a))
            plt.plot(v_true_env_db_curr_a)
            plt.grid()
            plt.show()

        # Impose spectral Env:------------------------------------------------------------------
        v_sp_env_targ = la.db(v_sp_env_db_curr_targ, b_inv=True)
        v_mag_filt_a = v_mag_white_a * v_sp_env_targ
        v_mag_filt_b = v_mag_white_b * v_sp_env_targ

        # Mix Sources:------------------------------------------------------------------
        v_mag_mix = v_mag_filt_a * (1.0 - sp_weight) + v_mag_filt_b * sp_weight
        v_real_mix = m_real_a[nx_a, :] * (
            1.0 - sp_weight) + m_real_b[nx_b, :] * sp_weight
        v_imag_mix = m_imag_a[nx_a, :] * (
            1.0 - sp_weight) + m_imag_b[nx_b, :] * sp_weight

        # Mix shifts:
        shift_mix = lu.round_to_int(shift_a * (1.0 - sp_weight) +
                                    shift_b * sp_weight)

        # Save:
        v_shifts_interp[nx_frm] = shift_mix
        m_mag_interp[nx_frm, :] = v_mag_mix
        m_real_interp[nx_frm, :] = v_real_mix
        m_imag_interp[nx_frm, :] = v_imag_mix

        if False:
            plt.figure()
            plt.plot(v_frm_short_a_ext_filt)
            plt.plot(v_frm_short_b_ext_filt)
            plt.grid()
            plt.show()
            plt.figure()
            plt.plot(v_frm_short_a_ext_filt)
            plt.plot(v_frm_short_b_ext_filt)
            plt.plot(v_frm_short_ext_filt)
            plt.grid()
            plt.show()

    # Merge:
    m_mag_merged = np.vstack((m_mag_a[:nx_strt_a, :], m_mag_interp,
                              m_mag_b[(nx_strt_b + nframes):, :]))
    m_real_merged = np.vstack((m_real_a[:nx_strt_a, :], m_real_interp,
                               m_real_b[(nx_strt_b + nframes):, :]))
    m_imag_merged = np.vstack((m_imag_a[:nx_strt_a, :], m_imag_interp,
                               m_imag_b[(nx_strt_b + nframes):, :]))
    v_shift_merged = np.r_[v_shift_a[:nx_strt_a], v_shifts_interp,
                           v_shift_b[(nx_strt_b + nframes):]]

    v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged,
                                           m_imag_merged, v_shift_merged)
    return v_sig_merged, fs
Ejemplo n.º 14
0
def speech_interp(wavfile_a, wavfile_b, nx_strt_a, nx_strt_b, nframes,
                  fft_len):

    # MagPhase analysis:
    m_mag_a, m_real_a, m_imag_a, v_f0_a, fs, v_shift_a = mp.analysis_lossless(
        wavfile_a)
    m_mag_b, m_real_b, m_imag_b, v_f0_b, fs, v_shift_b = mp.analysis_lossless(
        wavfile_b)

    v_pm_a = la.shift_to_pm(v_shift_a)
    v_pm_b = la.shift_to_pm(v_shift_b)

    v_sig_a, fs = la.read_audio_file(wavfile_a)
    v_sig_b, fs = la.read_audio_file(wavfile_b)

    # Epoch detection:
    #v_pm_sec_a, v_voi_a = la.reaper_epoch_detection(wavfile_a)
    #v_pm_sec_b, v_voi_b = la.reaper_epoch_detection(wavfile_b)
    #v_pm_a = lu.round_to_int(v_pm_sec_a * fs)
    #v_pm_b = lu.round_to_int(v_pm_sec_b * fs)

    #m_frms_syn   = np.zeros((nframes, fft_len))

    fft_len_half = 1 + fft_len / 2
    m_mag_interp = np.zeros((nframes, fft_len_half))
    m_real_interp = np.zeros((nframes, fft_len_half))
    m_imag_interp = np.zeros((nframes, fft_len_half))
    v_shifts_interp = np.zeros(nframes, dtype='int')

    for nx_frm in xrange(nframes):

        sp_weight = nx_frm / (nframes - 1.0)
        nx_a = nx_strt_a + nx_frm
        nx_b = nx_strt_b + nx_frm

        # Get formants:

        v_mag_db_a, v_lpc_mag_db_a, v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bw_a, v_frm_short_a, shift_a = get_formant_locations_from_raw_long_frame(
            v_sig_a, v_pm_a, nx_a, fft_len)
        v_mag_db_b, v_lpc_mag_db_b, v_frmnts_bins_b, v_frmnts_gains_db_b, v_frmnts_bw_b, v_frm_short_b, shift_b = get_formant_locations_from_raw_long_frame(
            v_sig_b, v_pm_b, nx_b, fft_len)

        # Formant mapping:----------------------------------------------------------------
        v_frmnts_bins_a_filt, v_frmnts_bins_b_filt = formant_mapping(
            v_frmnts_bins_a, v_frmnts_gains_db_a, v_frmnts_bins_b,
            v_frmnts_gains_db_b, fft_len)

        # Warping:---------------------------------------------------------------------

        # True envelope:
        v_true_env_db_a = la.true_envelope(v_mag_db_a[None, :],
                                           in_type='db',
                                           ncoeffs=400,
                                           thres_db=0.1)[0]
        v_true_env_db_b = la.true_envelope(v_mag_db_b[None, :],
                                           in_type='db',
                                           ncoeffs=400,
                                           thres_db=0.1)[0]

        v_sp_env_db_a_warp = warp_mag_spec(v_true_env_db_a,
                                           v_frmnts_bins_a_filt,
                                           v_frmnts_bins_b_filt, fft_len,
                                           sp_weight)
        v_sp_env_db_b_warp = warp_mag_spec(v_true_env_db_b,
                                           v_frmnts_bins_b_filt,
                                           v_frmnts_bins_a_filt, fft_len,
                                           (1 - sp_weight))

        #v_sp_env_db_a_warp = warp_mag_spec(v_lpc_mag_db_a, v_frmnts_bins_a_filt, v_frmnts_bins_b_filt, fft_len, sp_weight)
        #v_sp_env_db_b_warp = warp_mag_spec(v_lpc_mag_db_b, v_frmnts_bins_b_filt, v_frmnts_bins_a_filt, fft_len, (1-sp_weight))

        # Spectral envelope mix:-------------------------------------------------------
        v_sp_env_db_targ = v_sp_env_db_a_warp * (
            1.0 - sp_weight) + v_sp_env_db_b_warp * sp_weight

        # Impose spectral Env (FFT filter):------------------------------------------------------
        v_sp_env_diff_db_a = v_sp_env_db_targ - v_true_env_db_a
        v_sp_env_diff_db_b = v_sp_env_db_targ - v_true_env_db_b
        #v_frm_short_a_ext_filt = fft_filter(v_frm_short_a, shift_a, v_spec_diff_db_a, fft_len)
        #v_frm_short_b_ext_filt = fft_filter(v_frm_short_b, shift_b, v_spec_diff_db_b, fft_len)
        #v_sp_env_diff_a = la.db(v_sp_env_diff_db_a, b_inv=True)
        v_mag_filt_a = m_mag_a[nx_a, :] * la.db(v_sp_env_diff_db_a, b_inv=True)

        #v_sp_env_diff_b = la.db(v_sp_env_diff_db_b, b_inv=True)
        v_mag_filt_b = m_mag_b[nx_b, :] * la.db(v_sp_env_diff_db_b, b_inv=True)

        #fft_filter_magphase_domain(m_mag_a[nx_a,:], m_real_a[nx_a,:], m_imag_a[nx_a,:])

        # Mix Sources:------------------------------------------------------------------
        v_mag_mix = v_mag_filt_a * (1.0 - sp_weight) + v_mag_filt_b * sp_weight
        v_real_mix = m_real_a[nx_a, :] * (
            1.0 - sp_weight) + m_real_b[nx_b, :] * sp_weight
        v_imag_mix = m_imag_a[nx_a, :] * (
            1.0 - sp_weight) + m_imag_b[nx_b, :] * sp_weight

        # Mix sources:
        #v_frm_short_ext_filt = v_frm_short_a_ext_filt * (1.0-sp_weight) + v_frm_short_b_ext_filt * sp_weight
        #v_frm_short_ext_filt = v_frm_short_a_ext_filt

        # Mix shifts:
        shift_mix = lu.round_to_int(shift_a * (1.0 - sp_weight) +
                                    shift_b * sp_weight)

        # Save:
        v_shifts_interp[nx_frm] = shift_mix
        m_mag_interp[nx_frm, :] = v_mag_mix
        m_real_interp[nx_frm, :] = v_real_mix
        m_imag_interp[nx_frm, :] = v_imag_mix
        #m_frms_syn[nx_frm, :] = v_frm_short_ext_filt

        if False:
            plt.figure()
            plt.plot(v_frm_short_a_ext_filt)
            plt.plot(v_frm_short_b_ext_filt)
            plt.grid()
            plt.show()
            plt.figure()
            plt.plot(v_frm_short_a_ext_filt)
            plt.plot(v_frm_short_b_ext_filt)
            plt.plot(v_frm_short_ext_filt)
            plt.grid()
            plt.show()

    # Merge:
    #m_frms_syn_dc = np.fft.fftshift(m_frms_syn,  axes=1)
    #m_fft_syn     = la.remove_hermitian_half(np.fft.fft(m_frms_syn_dc))
    #m_mag_syn, m_real_syn, m_imag_syn = compute_lossless_spec_feats(m_fft_syn)

    m_mag_merged = np.vstack((m_mag_a[:nx_strt_a, :], m_mag_interp,
                              m_mag_b[(nx_strt_b + nframes):, :]))
    m_real_merged = np.vstack((m_real_a[:nx_strt_a, :], m_real_interp,
                               m_real_b[(nx_strt_b + nframes):, :]))
    m_imag_merged = np.vstack((m_imag_a[:nx_strt_a, :], m_imag_interp,
                               m_imag_b[(nx_strt_b + nframes):, :]))
    v_shift_merged = np.r_[v_shift_a[:nx_strt_a], v_shifts_interp,
                           v_shift_b[(nx_strt_b + nframes):]]

    v_sig_merged = synthesis_from_lossless(m_mag_merged, m_real_merged,
                                           m_imag_merged, v_shift_merged)

    return v_sig_merged, fs