Ejemplo n.º 1
0
def get_feat(wav_list_prefix, wav_path, feat_path, task, fftsize=256, hopsize=64):
    wav_folders = wav_path + task + '/'
    wav_list = wav_list_prefix + '_' +task +'_mix'
    output_dir = feat_path + '/' + task + '/'
    with open(wav_list, 'r') as f:
        for file,line in enumerate(f):
            print(task + ' file: ' + str(file+1))
            # Load wav files
            line = line.split('\n')[0]
            sr,clean_audio_1 = wav_read(wav_folders+'s1/'+line+'.wav')
            clean_audio_1 = clean_audio_1.astype('float32')/np.power(2,15)
            sr,clean_audio_2 = wav_read(wav_folders+'s2/'+line+'.wav')
            clean_audio_2 = clean_audio_2.astype('float32')/np.power(2,15)
            sr,mix_audio = wav_read(wav_folders+'mix/'+line+'.wav')        
            mix_audio = mix_audio.astype('float32')/np.power(2,15)
            # STFT
            Zxx_1 = stft(clean_audio_1)
            Zxx_2 = stft(clean_audio_2)
            Zxx_mix = stft(mix_audio)
            Zxx_1 = Zxx_1[:,0:(fftsize/2+1)]
            Zxx_2 = Zxx_2[:,0:(fftsize/2+1)]
            Zxx_mix = Zxx_mix[:,0:(fftsize/2+1)]
            # Store real and imaginary STFT of speaker1, speaker2 and mixture
            Zxx = np.stack((np.real(Zxx_1).astype('float32'),np.imag(Zxx_1).astype('float32'),np.real(Zxx_2).astype('float32'),np.imag(Zxx_2).astype('float32'),np.real(Zxx_mix).astype('float32'),np.imag(Zxx_mix).astype('float32')),axis=0)
            # Save features and targets to npy files
            np.save(output_dir+line, Zxx)
            # Save time-domain waveform to npy file
            audio_len = range(0, len(clean_audio_1)-fftsize+1, hopsize)[-1] + fftsize 
            audio = np.stack((clean_audio_1[:audio_len], clean_audio_2[:audio_len], mix_audio[:audio_len]), axis=0)
            np.save(output_dir+line+'_wave', audio)
def comparePlot(signal1, signal2, Fs, fft_size=512, norm=False, equal=False, title1=None, title2=None):

    import matplotlib.pyplot as plt

    td_amp = np.maximum(np.abs(signal1).max(), np.abs(signal2).max())

    if norm:
        if equal:
            signal1 /= np.abs(signal1).max()
            signal2 /= np.abs(signal2).max()
        else:
            signal1 /= td_amp
            signal2 /= td_amp
        td_amp = 1.

    plt.subplot(2,2,1)
    plt.plot(np.arange(len(signal1))/float(Fs), signal1)
    plt.axis('tight')
    plt.ylim(-td_amp, td_amp)
    if title1 is not None:
        plt.title(title1)

    plt.subplot(2,2,2)
    plt.plot(np.arange(len(signal2))/float(Fs), signal2)
    plt.axis('tight')
    plt.ylim(-td_amp, td_amp)
    if title2 is not None:
        plt.title(title2)

    import stft
    import windows

    eps = constants.get('eps')

    F1 = stft.stft(signal1, fft_size, fft_size / 2, win=windows.hann(fft_size))
    F2 = stft.stft(signal2, fft_size, fft_size / 2, win=windows.hann(fft_size))

    # try a fancy way to set the scale to avoid having the spectrum
    # dominated by a few outliers
    p_min = 1
    p_max = 99.5
    all_vals = np.concatenate((dB(F1+eps), dB(F2+eps))).flatten()
    vmin, vmax = np.percentile(all_vals, [p_min, p_max])

    cmap = 'jet'
    interpolation='sinc'

    plt.subplot(2,2,3)
    stft.spectroplot(F1.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax,
            cmap=plt.get_cmap(cmap), interpolation=interpolation)

    plt.subplot(2,2,4)
    stft.spectroplot(F2.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax, 
            cmap=plt.get_cmap(cmap), interpolation=interpolation)
Ejemplo n.º 3
0
def comparePlot(signal1, signal2, Fs, fft_size=512, norm=False, equal=False, title1=None, title2=None):

    import matplotlib.pyplot as plt

    td_amp = np.maximum(np.abs(signal1).max(), np.abs(signal2).max())

    if norm:
        if equal:
            signal1 /= np.abs(signal1).max()
            signal2 /= np.abs(signal2).max()
        else:
            signal1 /= td_amp
            signal2 /= td_amp
        td_amp = 1.

    plt.subplot(2,2,1)
    plt.plot(np.arange(len(signal1))/float(Fs), signal1)
    plt.axis('tight')
    plt.ylim(-td_amp, td_amp)
    if title1 is not None:
        plt.title(title1)

    plt.subplot(2,2,2)
    plt.plot(np.arange(len(signal2))/float(Fs), signal2)
    plt.axis('tight')
    plt.ylim(-td_amp, td_amp)
    if title2 is not None:
        plt.title(title2)

    from constants import eps
    import stft
    import windows

    F1 = stft.stft(signal1, fft_size, fft_size / 2, win=windows.hann(fft_size))
    F2 = stft.stft(signal2, fft_size, fft_size / 2, win=windows.hann(fft_size))

    # try a fancy way to set the scale to avoid having the spectrum
    # dominated by a few outliers
    p_min = 1
    p_max = 99.5
    all_vals = np.concatenate((dB(F1+eps), dB(F2+eps))).flatten()
    vmin, vmax = np.percentile(all_vals, [p_min, p_max])

    cmap = 'jet'
    interpolation='sinc'

    plt.subplot(2,2,3)
    stft.spectroplot(F1.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax,
            cmap=plt.get_cmap(cmap), interpolation=interpolation)

    plt.subplot(2,2,4)
    stft.spectroplot(F2.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax, 
            cmap=plt.get_cmap(cmap), interpolation=interpolation)
Ejemplo n.º 4
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    fs, x_in = UF.wavread(inputFile)
    w = get_window(window, M, False)

    x_out = stft.stft(x_in, w, N, H)

    energy1_in = np.sum((abs(x_in)**2))
    energy1_error = np.sum((abs(x_out - x_in)**2))
    SNR1 = 10*np.log10(energy1_in / energy1_error + eps)
    
    energy2_in = np.sum((abs(x_in[M:-M])**2))
    energy2_error = np.sum((abs(x_out[M:-M] - x_in[M:-M])**2))
    SNR2 = 10*np.log10(energy2_in / energy2_error + eps)

    return SNR1, SNR2
Ejemplo n.º 5
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    xs = stft.stft(s, w, N, H)
    E1 = sum(abs(x)**2)
    E2 = sum(abs(xs)**2)
    En = sum(abs(x - xs)**2)
    srn = 10 * np.log10(E1 / En + eps)
    xt = x[M:-M]
    xts = xs[M:-M]
    E1t = sum(abs(xt)**2)
    E2t = sum(abs(xts)**2)
    Ent = sum(abs(xt - xts)**2)
    srn2 = 10 * np.log10(E1t / Ent + eps)
    return srn, srn2
Ejemplo n.º 6
0
    def __fdndlp(self, data):
        """Frequency-domain variance-normalized delayed liner prediction 

        This is the core part of the WPE method. The variance-normalized 
        linear prediciton algorithm is implemented in each frequency bin 
        separately. Both the input and output signals are in time-domain.  

        Args:
            data: A 2-dimension numpy array with shape=(chanels, samples)

        Returns:
            A 2-dimension numpy array with shape=(output_channels, samples)
        """

        freq_data = stft.stft(data / np.abs(data).max(),
                              frame_size=self.frame_size,
                              overlap=self.overlap)
        self.freq_num = freq_data.shape[-1]
        drv_freq_data = freq_data[0:self.out_num].copy()
        for i in range(self.freq_num):
            xk = freq_data[:, :, i].T
            dk = self.__ndlp(xk)
            drv_freq_data[:, :, i] = dk.T
        drv_data = stft.istft(drv_freq_data,
                              frame_size=self.frame_size,
                              overlap=self.overlap)
        return drv_data / np.abs(drv_data).max()
Ejemplo n.º 7
0
def computeSNR(inputFile, window, M, N, H):
    """
	Input:
			inputFile (string): wav file name including the path 
			window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
					blackman, blackmanharris)
			M (integer): analysis window length (odd positive integer)
			N (integer): fft size (power of two, > M)
			H (integer): hop size for the stft computation
	Output:
			The function should return a python tuple of both the SNR values (SNR1, SNR2)
			SNR1 and SNR2 are floats.
	"""

    (fs, x) = UF.wavread(inputFile)  # get sample rate and input signal
    if M % 2 == 0:
        w = get_window(window, M, fftbins=True)  # get window type
    else:
        w = get_window(window, M, fftbins=False)  # get window type
    y = stft.stft(x, w, N, H)  # get output signal

    #SNR1
    e_s = np.sum(x**2)  # energy of the input signal
    e_n = np.sum((x - y)**2)  # energy of the noise signal
    SNR1 = float(e_s / e_n)
    SNR1 = 10 * np.log10(SNR1)

    #SNR2
    e_sshort = np.sum(x[M:x.size - M]**2)
    e_nshort = np.sum((y[M:x.size - M] - x[M:x.size - M])**2)
    SNR2 = float(e_sshort / e_nshort)
    SNR2 = 10 * np.log10(SNR2)

    return (SNR1, SNR2)
Ejemplo n.º 8
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    outputSound = stft.stft(x, w, N, H)

    snr1 = 10 * np.log10(np.sum(x**2) / np.sum((x - outputSound)**2))
    snr2 = 10 * np.log10(
        np.sum(x[M:-M]**2) / np.sum((x[M:-M] - outputSound[M:-M])**2))
    return (snr1, snr2)
Ejemplo n.º 9
0
    def __init__(self, file_name):
        from pysac import SacStreamIO
        import stft
        import numpy as np
        self.hash = []
        self.wlWinN = 60
        self.wlLagN = 6
        self.fqWinN = 60
        self.fqLagN = 10

        self.fqRspN = 32
        self.wlRspN = 32
        self.max900 = 0
        self.wl_x_level = 3
        #sac file read

        self.sac_st = self.GetFileData(fileName)
        nize = GenData([1, 600])
        nzdt = nize.GenWave()
        import scipy.signal as ssg
        for temp_data in self.sac_st[0:1]:

            temp_data[3000:3000 + 600] = temp_data[3000:3000 + 600] + nzdt[0]
            temp_data[9000:9000 + 600] = temp_data[9000:9000 + 600] + nzdt[0]
            self.sac_data = ssg.detrend(temp_data)
            #calcaute stft
            fqData = stft.stft(self.sac_data, self.fqWinN, self.fqLagN,
                               self.fqRspN)
            fqData = np.abs(fqData)
            wlDataX = self.WaveLetX(fqData, level=3)
            self.wlData = self.WaveLetAndRegular(wlDataX, level=3)
            self.wlData = self.RegularY(self.wlData)
            self.wlData = self.TrimData(self.wlData)
            self.GetFingerPoint(2)
Ejemplo n.º 10
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    y = stft.stft(x, fs, w, N, H)
    noise = np.array(x - y)

    E_x = np.sum( abs(x)**2 )
    E_noise = np.sum( abs(noise)**2 )

    E_xAfterM = np.sum( abs( x[M : x.size-M] )**2 )
    E_nAfterM = np.sum( abs( noise[M : x.size-M] )**2 )

    SNR1 = 10 * np.log10(E_x / E_noise)
    SNR2 = 10 * np.log10(E_xAfterM/E_nAfterM)

    return (SNR1, SNR2)
Ejemplo n.º 11
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    (fs, x) = UF.wavread(inputFile)                       #get wav file from inputFile
    w = get_window(window, M,False)                    #get window 
    
    #Apply STFT analysis and reconstruction
    y = stft.stft(x, w, N, H)
    
    #Compute SNR for y x
    noiseYX = abs(y-x)                                                  #calcuate noise between y and x
    noiseYX_E = sum(noiseYX**2)                                 #calcualte enegery of noise
    signal_E = sum(x**2)                                                #calcuate energy of signal
    SNR_YX = 10*np.log10(signal_E / noiseYX_E)          #calcuate signal to noise ratio
    
    #Compute SNR for segment of x and y
    x_seg = x[M:x.size-M]
    y_seg = y[M:y.size-M]
    noiseYXseg = abs(y_seg - x_seg)
    noiseYXseg_E = sum(noiseYXseg**2)
    signal_seg_E = sum(x_seg**2)
    SNR_YXseg = 10*np.log10(signal_seg_E/noiseYXseg_E)
    
    return (SNR_YX, SNR_YXseg)
Ejemplo n.º 12
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here

    def energy(X, k1, k2):
        X2 = np.power(X, 2)
        return np.sum(X2[k1:k2])

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    xsyn = stft.stft(x, fs, w, N, H)
    noise = np.subtract(xsyn, x)
    
    Esignal1 = energy(x, 0, len(x))
    Enoise1 = energy(noise, 0, len(noise))
    SNR1 = 10*np.log10(Esignal1/Enoise1)
    
    Esignal2 = energy(x, M+1, len(x)-M-1)
    Enoise2 = energy(noise, M+1, len(noise)-M-1)
    SNR2 = 10*np.log10(Esignal2/Enoise2)

    return SNR1, SNR2
Ejemplo n.º 13
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)

    y = stft.stft(x, w, N, H)

    noise = x - y
    E_signal1 = np.sum(x**2)
    E_noise1 = np.sum(noise**2)
    snr1 = 10 * np.log10(E_signal1 / E_noise1)

    E_signal2 = np.sum(x[M:-M]**2)
    E_noise2 = np.sum(noise[M:-M]**2)
    snr2 = 10 * np.log10(E_signal2 / E_noise2)

    return snr1, snr2
Ejemplo n.º 14
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    x = UF.wavread(inputFile)[1]
    w = get_window(window, M)

    xSynth = stft(x, 1.0, w, N, H)

    eSignal1 = sum(x**2)
    eNoise1 = sum((x-xSynth)**2)
    SNR1 = 10.0*np.log10(eSignal1/eNoise1)

    x2 = x[M:len(x)-M]
    xSynth2 = xSynth[M:len(xSynth)-M]

    eSignal2 = sum(x2**2)
    eNoise2 = sum((x2-xSynth2)**2)
    SNR2 = 10.0*np.log10(eSignal2/eNoise2)

    return (SNR1,SNR2)
Ejemplo n.º 15
0
 def __init__(self,file_name):
     from pysac import SacStreamIO
     import stft
     import numpy as np
     self.hash=[]
     self.wlWinN=100
     self.wlLagN=10
     self.fqWinN=100
     self.fqLagN=50
     self.fqRspN=64
     self.wlRspN=64
     self.max900=0
     self.wl_x_level=3;
     #sac file read
     sac_st=SacStreamIO(file_name)
     sac_st.DataDetrend()
     self.sac_delta=sac_st.delta
     self.sac_data=sac_st.yVect
     print("Sac File Read Finished!")
     #calcaute stft
     fqData=stft.stft(self.sac_data,self.fqWinN,self.fqLagN,self.fqRspN)
     fqData=np.abs(fqData)
     print("STFT Trans Finished!")
     self.wlData=self.WaveLetX(fqData,level=3)
     print("Wavelet X trans Finished!")
     #self.wlData=self.WaveLetAndRegular(wlDataX,level=3)
     print("Wavelet Trans Finished!")
     self.wlData=self.RegularY(self.wlData)
     print("Regular Finished!")
     self.wlData=self.TrimData(self.wlData)
     print("Bit Trans Finished!")
     self.GetFingerPoint(2)
     print("Hash Trans Finished!")
Ejemplo n.º 16
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    fs, x = UF.wavread(inputFile)

    # stft analysis and synthesis
    w = get_window(window, M)
    y = stft.stft(x, w, N, H)

    # the difference between output y and input x
    noise = (x - y)

    # calculating the snr1
    snr_1 = computeSNR_(x, noise)

    # calculating the snr2
    seg_x = x[M:-M]
    seg_noise = noise[M:-M]
    snr_2 = computeSNR_(seg_x, seg_noise)

    return snr_1, snr_2
def computeSNR(inputFile, window, M, N, H):
    """
	Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
	Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
	"""
    ## your code here
    w = get_window(window, M)  # get the window
    fs, x = UF.wavread(inputFile)  # read in the inputFile

    Esignal = np.sum(np.square(x))
    y = stft.stft(x, w, N, H)
    noise = y - x
    yW = y.copy()
    xW = x.copy()
    yW[:M] = 0.0
    yW[-M:] = 0.0
    xW[:M] = 0.0
    xW[-M:] = 0.0
    EsignalW = np.sum(np.square(xW))
    noiseW = yW - xW

    Enoise = np.sum(np.square(noise))
    EnoiseW = np.sum(np.square(noiseW))
    SNR1 = 10.0 * np.log10(Esignal / Enoise)
    SNR2 = 10.0 * np.log10(EsignalW / EnoiseW)

    return (SNR1, SNR2)
Ejemplo n.º 18
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    #read from the file
    FS, x = UF.wavread(inputFile)

    w = get_window(window, M)
    #do a stft computation
    y = stft.stft(x, FS, w, N, H)

    #compute SNR over complete signal
    diff = y - x
    energy_signal = (y**2).sum()
    energy_noise = (diff**2).sum()
    SNR1 = 10 * np.log10(energy_signal/energy_noise)

    #compute SNR over sliced signal
    energy_signal_sliced = (y[M:-M]**2).sum()
    energy_noise_sliced = (diff[M:-M]**2).sum()
    SNR2 = 10 * np.log10(energy_signal_sliced/energy_noise_sliced)


    return (SNR1, SNR2)
Ejemplo n.º 19
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    if M % 2:
        M = M - 1

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    y = stft.stft(x, w, N, H)


    x2 = x[M:-M]
    y2 = y[M:-M]

    return 10*np.log10(energy(y) / energy(x-y)), 10*np.log10(energy(y2) / energy(x2 - y2))
Ejemplo n.º 20
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    w = get_window(window, M)  # get the window
    (fs, x) = UF.wavread(inputFile)
    # x: input sound, w: analysis window, N: FFT size, H: hop size
    # returns y: output sound
    STFTX = stft.stft(x, fs, w, N, H)
    xoutput = np.arange(x.size)
    energynoise = 0
    energynoise2 = 0
    for i in range(0, x.size):
        energynoise += np.power(np.abs(x[i].real) - np.abs(STFTX[i].real), 2)
        if i > M and i < x.size - M:
            energynoise2 += np.power(np.abs(x[i].real) - np.abs(STFTX[i].real), 2)
    energysignal = 0
    energysignal2 = 0
    for i in range(0, x.size):
        energysignal += np.power(np.abs(x[i].real), 2)
        if i > M and i < x.size - M:
            energysignal2 += np.power(np.abs(x[i].real), 2)
    SNR1 = 10 * np.log10(energysignal / energynoise)
    SNR2 = 10 * np.log10(energysignal2 / energynoise2)
    return SNR1, SNR2
Ejemplo n.º 21
0
def spectrum(signal, Fs, N):

    import stft
    import windows

    F = stft.stft(signal, N, N / 2, win=windows.hann(N))
    stft.spectroplot(F.T, N, N / 2, Fs)
Ejemplo n.º 22
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    periodicWindow = False
    if (M % 2 == 0):
        periodicWindow = True

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M, fftbins=periodicWindow)
    y = stft.stft(x, w, N, H)
    noise = y - x

    Esignal = np.sum(np.square(x)) + eps
    Enoise = np.sum(np.square(noise)) + eps
    SNR1 = 10 * np.log10(Esignal / Enoise)

    Esignal2 = np.sum(np.square(x[M:-M]))
    Enoise2 = np.sum(np.square(noise[M:-M]))
    SNR2 = 10 * np.log10(Esignal2 / Enoise2)

    return (SNR1, SNR2)
Ejemplo n.º 23
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    
    ## your code here
    
    fs, x = UF.wavread(inputFile)
    
    w = get_window(window, M)
    
    xrec = stft.stft(x, fs, w, N, H)
    
    eSignal = energy(x)
    eSignal_part = energy(x[M:-M])
    eNoise = energy(x-xrec)
    eNoise_part = energy((x-xrec)[M:-M])

    snr = 10 * np.log10(eSignal / eNoise)
    snr_part = 10 * np.log10(eSignal_part / eNoise_part)
    
    return snr, snr_part
Ejemplo n.º 24
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    (fs, x) = UF.wavread(inputFile)

    w = get_window(window, M)

    y = stft.stft(x, w, N, H)

    #SNR1 Computation
    Ex = sum(abs(x)**2)
    Ey = sum(abs(y)**2)
    En = abs(Ey - Ex)
    SNR1 = 10 * np.log10(Ey / En)

    #SNR2 Computation
    xshort = x[M:(x.size - 6)]
    yshort = y[M:(x.size - 6)]
    Exshort = sum(abs(xshort)**2)
    Eyshort = sum(abs(yshort)**2)
    Enshort = abs(Eyshort - Exshort)
    SNR2 = 10 * np.log10(Eyshort / Enshort)

    return (SNR1 + 5.0), (SNR2 + 200.0)
Ejemplo n.º 25
0
    def find_peaks(self, d, sr):
        """ Find the local peaks in the spectrogram as basis for fingerprints.
            Returns a list of (time_frame, freq_bin) pairs.

        :params:
          d - np.array of float
            Input waveform as 1D vector

          sr - int
            Sampling rate of d (not used)

        :returns:
          pklist - list of (int, int)
            Ordered list of landmark peaks found in STFT.  First value of
            each pair is the time index (in STFT frames, i.e., units of
            n_hop/sr secs), second is the FFT bin (in units of sr/n_fft
            Hz).
        """
        if len(d) == 0:
            return []
            
        # print(d)
        # print(len(d))

        # masking envelope decay constant
        a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP)
        # Take spectrogram
        mywin = np.hanning(self.n_fft + 2)[1:-1]
        sgram = np.abs(stft.stft(d, n_fft=self.n_fft,
                                 hop_length=self.n_hop,
                                 window=mywin))
        sgrammax = np.max(sgram)
        if sgrammax > 0.0:
            sgram = np.log(np.maximum(sgram, np.max(sgram) / 1e6))
            sgram = sgram - np.mean(sgram)
        else:
            # The sgram is identically zero, i.e., the input signal was identically
            # zero.  Not good, but let's let it through for now.
            print("find_peaks: Warning: input signal is identically zero.")
        # High-pass filter onset emphasis
        # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits

        # sgram = np.array([scipy.signal.lfilter([1, -1],
        #                                        [1, -HPF_POLE ** (1 / OVERSAMP)], s_row)
        #                   for s_row in sgram])[:-1, ]

        # Prune to keep only local maxima in spectrum that appear above an online,
        # decaying threshold
        peaks = self._decaying_threshold_fwd_prune(sgram, a_dec)
        # Further prune these peaks working backwards in time, to remove small peaks
        # that are closely followed by a large peak
        peaks = self._decaying_threshold_bwd_prune_peaks(sgram, peaks, a_dec)
        # build a list of peaks we ended up with
        scols = np.shape(sgram)[1]
        pklist = []
        for col in range(scols):
            for bin_ in np.nonzero(peaks[:, col])[0]:
                pklist.append((col, bin_))
        return pklist
Ejemplo n.º 26
0
def itakura_saito(x1, x2, sigma2_n, stft_L=128, stft_hop=128):

  P1 = np.abs(stft(x1, stft_L, stft_hop))**2
  P2 = np.abs(stft(x2, stft_L, stft_hop))**2

  VAD1 = P1.mean(axis=1) > 2*stft_L**2*sigma2_n
  VAD2 = P2.mean(axis=1) > 2*stft_L**2*sigma2_n
  VAD = np.logical_or(VAD1, VAD2)

  if P1.shape[0] != P2.shape[0] or P1.shape[1] != P2.shape[1]:
    raise ValueError("Error: Itakura-Saito requires both array to have same length")

  R = P1[VAD,:]/P2[VAD,:]

  IS = (R - np.log(R) - 1.).mean(axis=1)

  return np.median(IS)
Ejemplo n.º 27
0
 def illustrate_match(self, analyzer, ht, filename):
     """ Show the query fingerprints and the matching ones
         plotted over a spectrogram """
     # Make the spectrogram
     # d, sr = librosa.load(filename, sr=analyzer.target_sr)
     d, sr = audio_read.audio_read(filename,
                                   sr=analyzer.target_sr,
                                   channels=1)
     sgram = np.abs(
         stft.stft(d,
                   n_fft=analyzer.n_fft,
                   hop_length=analyzer.n_hop,
                   window=np.hanning(analyzer.n_fft + 2)[1:-1]))
     sgram = 20.0 * np.log10(np.maximum(sgram, np.max(sgram) / 1e6))
     sgram = sgram - np.mean(sgram)
     # High-pass filter onset emphasis
     # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits
     # spectrogram enhancement
     if self.illustrate_hpf:
         HPF_POLE = 0.98
         sgram = np.array([
             scipy.signal.lfilter([1, -1], [1, -HPF_POLE], s_row)
             for s_row in sgram
         ])[:-1, ]
     sgram = sgram - np.max(sgram)
     librosa.display.specshow(sgram,
                              sr=sr,
                              hop_length=analyzer.n_hop,
                              y_axis='linear',
                              x_axis='time',
                              cmap='gray_r',
                              vmin=-80.0,
                              vmax=0)
     # Do the match?
     q_hashes = analyzer.wavfile2hashes(filename)
     # Run query, get back the hashes for match zero
     results, matchhashes = self.match_hashes(ht, q_hashes, hashesfor=0)
     if self.sort_by_time:
         results = sorted(results, key=lambda x: -x[2])
     # Convert the hashes to landmarks
     lms = audfprint_analyze.hashes2landmarks(q_hashes)
     mlms = audfprint_analyze.hashes2landmarks(matchhashes)
     # Overplot on the spectrogram
     plt.plot(
         np.array([[x[0], x[0] + x[3]] for x in lms]).T,
         np.array([[x[1], x[2]] for x in lms]).T, '.-g')
     plt.plot(
         np.array([[x[0], x[0] + x[3]] for x in mlms]).T,
         np.array([[x[1], x[2]] for x in mlms]).T, '.-r')
     # Add title
     plt.title("Matched as " +
               ht.names[results[0][0]].split("/")[1].split(".")[0])
     # Display
     plt.savefig("./src/static/sgram" + uuid.uuid4().hex + ".png",
                 bbox_inces="tight")
     # plt.show()
     # Return
     return results
Ejemplo n.º 28
0
 def test_consistency(self):
   x = sin(scipy.linspace(0,1,44100) * 2 *  scipy.pi * 440)
   framesz = 1024
   X = stft(x, framesz)
   indices = [numpy.argmax(X[i][:framesz/2]) for i in range(len(X))]
   previous = indices[0]
   for val in indices[1:]:
     self.assertTrue(abs(abs(val)-abs(previous)) <= 1)
     previous = val
Ejemplo n.º 29
0
def itakura_saito(x1, x2, sigma2_n, stft_L=128, stft_hop=128):

    P1 = np.abs(stft(x1, stft_L, stft_hop))**2
    P2 = np.abs(stft(x2, stft_L, stft_hop))**2

    VAD1 = P1.mean(axis=1) > 2 * stft_L**2 * sigma2_n
    VAD2 = P2.mean(axis=1) > 2 * stft_L**2 * sigma2_n
    VAD = np.logical_or(VAD1, VAD2)

    if P1.shape[0] != P2.shape[0] or P1.shape[1] != P2.shape[1]:
        raise ValueError(
            "Error: Itakura-Saito requires both array to have same length")

    R = P1[VAD, :] / P2[VAD, :]

    IS = (R - np.log(R) - 1.).mean(axis=1)

    return np.median(IS)
Ejemplo n.º 30
0
def get_stft(x, wsize=512, tstep=256, sigma=None):
    """ if necessary load the wav file and get the stft"""
    if isinstance(x, str):
        sig = Signal(x, mono=True, normalize=True)
        x = sig.data

    if sigma is not None:
        x += sigma * np.random.randn(*x.shape)

    return np.squeeze(stft.stft(x, wsize, tstep))
Ejemplo n.º 31
0
    def find_peaks(self, d, sr):
        """ Find the local peaks in the spectrogram as basis for fingerprints.
            Returns a list of (time_frame, freq_bin) pairs.

        :params:
          d - np.array of float
            Input waveform as 1D vector

          sr - int
            Sampling rate of d (not used)

        :returns:
          pklist - list of (int, int)
            Ordered list of landmark peaks found in STFT.  First value of
            each pair is the time index (in STFT frames, i.e., units of
            n_hop/sr secs), second is the FFT bin (in units of sr/n_fft
            Hz).
        """
        if len(d) == 0:
            return []

        # masking envelope decay constant
        a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP)
        # Take spectrogram
        mywin = np.hanning(self.n_fft + 2)[1:-1]
        sgram = np.abs(stft.stft(d, n_fft=self.n_fft,
                                 hop_length=self.n_hop,
                                 window=mywin))
        sgrammax = np.max(sgram)
        if sgrammax > 0.0:
            sgram = np.log(np.maximum(sgram, np.max(sgram) / 1e6))
            sgram = sgram - np.mean(sgram)
        else:
            # The sgram is identically zero, i.e., the input signal was identically
            # zero.  Not good, but let's let it through for now.
            print("find_peaks: Warning: input signal is identically zero.")
        # High-pass filter onset emphasis
        # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits
        sgram = np.array([scipy.signal.lfilter([1, -1],
                                               [1, -HPF_POLE ** (1 / OVERSAMP)], s_row)
                          for s_row in sgram])[:-1, ]
        # Prune to keep only local maxima in spectrum that appear above an online,
        # decaying threshold
        peaks = self._decaying_threshold_fwd_prune(sgram, a_dec)
        # Further prune these peaks working backwards in time, to remove small peaks
        # that are closely followed by a large peak
        peaks = self._decaying_threshold_bwd_prune_peaks(sgram, peaks, a_dec)
        # build a list of peaks we ended up with
        scols = np.shape(sgram)[1]
        pklist = []
        for col in range(scols):
            for bin_ in np.nonzero(peaks[:, col])[0]:
                pklist.append((col, bin_))
        return pklist
Ejemplo n.º 32
0
def calc_sp(audio, fft_size, hop_size, window):
    
    sp = stft.stft(x=audio, 
                    window_size=fft_size, 
                    hop_size=hop_size, 
                    window=window, 
                    mode='complex')

    sp = sp.astype(np.complex64)

    return sp
def read_and_nmf(input_file):
    """
    :param input_file: file to be read
    :return: w (components from nmf)
    """
    (rate, data) = read(input_file)
    bee_data = (data[:, 0] + data[:, 1]) / 2.0
    if np.amin(bee_data) < -1 or np.amax(bee_data) > 1:
        bee_data /= float(max(abs(np.amax(bee_data)), abs(np.amin(bee_data))))
    T = len(bee_data) / fs
    X = transform.stft(bee_data, fs, framesz, hop)
    M = abs(X)
    w, h = nmf.factorize(M, pc=NUM_COMPONENTS, iterations=ITERATIONS)
    return w
Ejemplo n.º 34
0
def read_and_nmf(input_file):
    """
    :param input_file: file to be read
    :return: w (components from nmf)
    """
    (rate, data) = read(input_file)
    bee_data = (data[:, 0] + data[:, 1]) / 2.0
    if np.amin(bee_data) < -1 or np.amax(bee_data) > 1:
        bee_data /= float(max(abs(np.amax(bee_data)), abs(np.amin(bee_data))))
    T = len(bee_data) / fs
    X = transform.stft(bee_data, fs, framesz, hop)
    M = abs(X)
    w, h = nmf.factorize(M, pc=NUM_COMPONENTS, iterations=ITERATIONS)
    return w
def create_audio_spectrogram(audio, window_size, window_overwrap_rate,
                             clipping_threshold):
    # Convert audio data to numpy
    signal = audio.to_numpy()

    # Framing settings
    stride = int(window_size * window_overwrap_rate)

    # Short time Fourier transform
    spectrum = stft.stft(signal, window_size, stride)

    # Compute Fourier feature
    spectrum = stft.to_feature(spectrum, clipping_threshold)

    return spectrum
Ejemplo n.º 36
0
def pvoc(x,
         sr,
         factor,
         Hs=512,
         window=signal.hann(1024, sym=False),
         phase_lock=False):
    in_size = x.shape[0]
    win_len = window.shape[0]
    win_len_half = int(np.round(win_len / 2))
    out_size = int(np.ceil(factor * in_size))
    anchor_points = np.array([[0, 0], [in_size - 1, out_size - 1]])
    syn_positions = np.arange(0, out_size + win_len_half, Hs)
    an_positions = np.round(
        np.interp(syn_positions, anchor_points[:, 1], anchor_points[:, 0]))
    an_hops = np.concatenate(([0], an_positions[1:] - an_positions[:-1]))
    y = np.zeros((out_size + 2 * win_len))
    x = np.concatenate((np.zeros(
        (win_len_half)), x, np.zeros((win_len + int(an_hops[1])))))

    X = stft.stft(x, sr, an_positions, window, win_len)
    Y = np.zeros_like(X)
    Y[:, 0] = X[:, 0]  #assuming columns are frames
    k = np.arange(win_len_half + 1).T
    omega = 2 * np.pi * k / win_len
    print(an_hops[1])
    print(an_hops[-1])
    for i in range(1, X.shape[1]):
        dphi = omega * an_hops[i]
        current_phase = np.angle(X[:, i])
        prev_phase = np.angle(X[:, i - 1])
        phase_inc = current_phase - prev_phase - dphi
        phase_inc = phase_inc - 2 * np.pi * np.round(phase_inc / (2 * np.pi))
        ipa_sample = omega + phase_inc / an_hops[i]
        ipa_hop = ipa_sample * Hs
        syn_phase = np.angle(Y[:, i - 1])
        if not phase_lock:
            theta = syn_phase + ipa_hop - current_phase
            phasor = np.exp(1j * theta)
        else:
            p, v = get_peaks(np.abs(X[:, i]))
            theta = np.zeros_like(Y[:, i])
            for j in range(len(p)):
                theta[v[j]:v[j + 1]] = syn_phase[p[j]] + ipa_hop[
                    p[j]] - current_phase[p[j]]
            phasor = np.exp(1j * theta)
        Y[:, i] = phasor * X[:, i]
    y = stft.istft(Y, Hs, window)
    return y
Ejemplo n.º 37
0
def test():
    wavfile = "../golf_D.wav"
    data, fs = wavread(wavfile)

    ### STFT
    fftLen = 1024
    win = hanning(fftLen)
    step = fftLen / 8
    spectrogram = abs(stft(data, win, step)[:, :fftLen / 2 + 1]).T

    ### 表示
    fig = pl.figure()
    fig.patch.set_alpha(0.)
    imshow_sox(spectrogram)
    pl.tight_layout()
    pl.show()
Ejemplo n.º 38
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    y = stft.stft(x, w, N, H)
    if len(x) <> len(y):
        print ' x ' + str(len(x)) + ' y ' + str(len(y))
        return 0, 0

    #********************SNR1************************
    Ex = sum(x**2)

    noise = abs(x - y)
    Enoise = sum(noise**2)
    SNR1 = 10 * np.log10(Ex / Enoise)

    #**********************SNR2***********************
    xp = x[M:-M]
    yp = y[M:-M]

    Exp = sum(xp**2)

    if len(xp) <> len(yp):
        print ' x ' + str(len(x)) + ' xp ' + str(len(xp)) + ' yp ' + str(
            len(yp))
        return 0, 0
    noisep = abs(xp - yp)
    Enoisep = sum(noisep**2)
    SNR2 = 10 * np.log10(Exp / Enoisep)

    return SNR1, SNR2
def test():
    # wavfile = "../wav/aiueo.wav"
    wavfile = "./golf_D.wav"
    # data, fs, enc = wavread(wavfile)
    data, fs = wavread(wavfile)

    ### STFT										
    fftLen = 1024
    win = hanning(fftLen)
    step = fftLen / 8
    spectrogram = abs(stft(data, win, step)[:, : fftLen / 2 + 1]).T

    ### 表示										
    fig = pl.figure()
    fig.patch.set_alpha(0.)
    imshow_sox(spectrogram)
    pl.tight_layout()
    pl.show()
Ejemplo n.º 40
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)
    y = stft.stft(x, w, N, H)
    Ein, Eout, Enos = 0.0, 0.0, 0.0
    for n in range(np.size(x)):
        Ein = Ein + abs(x[n])**2
    for n in range(np.size(y)):
        Eout = Eout + abs(y[n])**2
    Enos = abs(Eout - Ein)
    print(Ein)
    print(Eout)
    print(Enos)
    SNR1 = 10 * np.log10(Ein / Enos)

    xsub = np.zeros(np.size(x) - M * 2)
    ysub = np.zeros(np.size(y) - M * 2)
    xsub = x[M:-M]
    ysub = y[M:-M]
    Eins, Eouts, Enoss = 0.0, 0.0, 0.0
    for n in range(np.size(xsub)):
        Eins = Eins + abs(xsub[n])**2
    for n in range(np.size(ysub)):
        Eouts = Eouts + abs(ysub[n])**2
    Enoss = abs(Eouts - Eins)
    print(Eins)
    print(Eouts)
    print(Enoss)
    SNR2 = 10 * np.log10(Eins / Enoss)
    print(SNR1, SNR2)
    return (SNR1, SNR2)
Ejemplo n.º 41
0
    def predict_channel(audio):
        length = np.shape(audio)[0]
        m = resample(audio, 44100, 22050)
        M = stft(m.reshape(-1, 1), hop_size, win_size, fft_size)
        Mmag = np.abs(M).T
        spec_frames, n_bins = Mmag.shape
        pad_size = int((n_frames - 1) / 2)
        Mmag = np.concatenate((np.zeros(
            (pad_size, n_bins)), Mmag, np.zeros((pad_size, n_bins))))
        new_strides = (Mmag.strides[0], Mmag.strides[0], Mmag.strides[1])
        Mmag = as_strided(Mmag, (spec_frames, n_frames, n_bins), new_strides)
        Mmag = Mmag[:, np.newaxis, :, :]
        vocals = np.zeros(M.T.shape)
        bass = np.zeros(M.T.shape)
        drums = np.zeros(M.T.shape)
        other = np.zeros(M.T.shape)

        for i in range(spec_frames):
            X = Mmag[i, :, :, :]
            in_data = torch.from_numpy(
                X.astype(np.float32)[np.newaxis, :, :, :])
            if torch.cuda.is_available():
                in_data = in_data.cuda()
            i_result = model(Variable(in_data)).cpu().data.numpy()
            vocals[i, :] = i_result[0, :n_bins]
            drums[i, :] = i_result[0, n_bins:2 * n_bins]
            bass[i, :] = i_result[0, 2 * n_bins:3 * n_bins]
            other[i, :] = i_result[0, 3 * n_bins:4 * n_bins]

        all_masks = vocals + bass + drums + other

        vocals = vocals / all_masks
        bass = bass / all_masks
        drums = drums / all_masks
        other = other / all_masks
        vocal_est = resample(istft(M * vocals.T, hop_size, win_size, 22050),
                             22050, 44100, 0)[:length, :]
        bass_est = resample(istft(M * bass.T, hop_size, win_size, 22050),
                            22050, 44100, 0)[:length, :]
        drums_est = resample(istft(M * drums.T, hop_size, win_size, 22050),
                             22050, 44100, 0)[:length, :]
        other_est = resample(istft(M * other.T, hop_size, win_size, 22050),
                             22050, 44100, 0)[:length, :]
        return (vocal_est, bass_est, drums_est, other_est)
Ejemplo n.º 42
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
      inputFile (string): input sound file (monophonic with sampling rate of 44100)
      window (string): analysis window type (choice of rectangular, triangular,
          hanning, hamming, blackman, blackmanharris)
      M (integer): analysis window length (odd positive integer)
      N (integer): fft size (power of two, such that N > M)
      H (integer): hop size for the stft computation
    Output:
      The function should return a python tuple of both the SNR values (SNR1, SNR2)
      SNR1 and SNR2 are floats.
    """
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    if N < M is True:
        raise ValueError("'N' should be greather than 'M'")
    if np.log2(N) % 1 != 0:
        raise ValueError("Input not power of 2")
    # Xm, Xp = stft.stftAnal(x, w, N, H)
    # x_ret = stft.stftSynth(Xm, Xp, M, H)
    x_ret = stft.stft(x, w, N, H)
    rest_x_H = x.shape[0] % H
    if rest_x_H % H != 0:
        delta_end = int((H - rest_x_H) // 2)
        print(delta_end)
        x_ret = x_ret[delta_end:-delta_end]
    print(x.shape, x_ret.shape, H, x.shape[0] % H)
    plt.figure(figsize=(8, 6), dpi=120)
    plt.subplot(2,1,1)
    plt.plot(x)
    plt.subplot(2,1,2)
    plt.plot(x_ret)
    E_signal = (np.abs(x) ** 2).sum()
    #E_noise = (np.abs(x - x_ret) ** 2).sum()
    #print(E_signal, E_noise)
    #snr1 = 10 * np.log10(E_signal / E_noise)
    #E_signal = (np.abs(x[M:-M]) ** 2).sum()
    #E_noise = ((np.abs(x - x_ret)[M:-M]) ** 2).sum()
    #print(E_signal, E_noise)
    #snr2 = 10 * np.log10(E_signal / E_noise)
    #return (snr1, snr2)
    return(x, x_ret)
Ejemplo n.º 43
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming,
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)
    y = stft.stft(x, fs, w, N, H)
    noise = x - y
    SNR1 = SNR(E(x), E(noise))
    SNR2 = SNR(E(x[M:-M]), E(noise[M:-M]))
    return (SNR1, SNR2)
Ejemplo n.º 44
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    windowing = get_window(window, M)

    (fs, x) = UF.wavread(inputFile)

    y = STFT.stft(x, fs, windowing, N, H)

    noise1 = x - y

    Esignal1 = np.sum(np.square(x))
    Enoise1 = np.sum(np.square(noise1))

    snr1 = 10 * np.log10(Esignal1 / Enoise1)

    x2 = x[M:len(x)-M]
    y2 = y[M:len(y)-M]
    
    noise2 = x2 - y2
    
    Esignal2 = np.sum(np.square(x2))
    Enoise2 = np.sum(np.square(noise2))

    snr2 = 10 * np.log10(Esignal2 / Enoise2)

    return (snr1, snr2)
Ejemplo n.º 45
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming,
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    (fs, x) = UF.wavread(inputFile)
    y = stft.stft(x, fs, get_window(window, M), N, H)
    e = x - y
    snr1 = 10 * np.log10(np.sum(np.power(np.abs(x), 2)) / np.sum(np.power(np.abs(e), 2)))
    xp = x[M:len(x)-M]
    yp = y[M:len(x)-M]
    ep = xp - yp
    snr2 = 10 * np.log10(np.sum(np.power(np.abs(xp), 2)) / np.sum(np.power(np.abs(ep), 2)))
    return (snr1, snr2)
Ejemplo n.º 46
0
def compute_dynamic_features(filename):
    '''Compute dynamic features given Mel filterbank features.
    
    Argument :
        filename: filename of the file containing Mel filterbank features located in settings.DIR_MEL_FEATURES
            (without path, without npy extension)
   
    Returns: 0 if success
    
    The output file is located in settings.DIR_DYNAMIC_FEATURES.
    '''
    
    melFeatures = numpy.load(settings.DIR_MEL_FEATURES + filename + '.npy')
    
    nPoints, nChannels = melFeatures.shape
    if nChannels != 26:
        print "Warning : 26 channels expected"
    
    '''
    ch = 10
    power = numpy.zeros((nPoints,1))
    for i in range(nPoints):
        power[i] = numpy.abs(melFeatures[i,ch]*melFeatures[i,ch]).sum()
    plt.xticks(range(0,120,5))
    plt.plot(numpy.arange(nPoints)*(120./nPoints), power)
    '''

    timeSize = stft.stft_time_size(melFeatures[:,0], settings.FFT_SIZE, settings.OVERLAP)
    dynamicFeatures = numpy.zeros((nChannels, settings.FFT_SIZE//2+1, timeSize), dtype=complex)
    for i in range(nChannels):
        dynamicFeatures[i,:,:] = stft.stft(melFeatures[:,i], settings.FFT_SIZE, settings.OVERLAP).T
    
    
    numpy.save(settings.DIR_DYNAMIC_FEATURES + filename + '.npy', dynamicFeatures)
    
    return 0
Ejemplo n.º 47
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    fs,x = UF.wavread(inputFile)
    w = get_window(window,M)
    y = stft.stft(x,fs,w,N,H)

    noise1 = x-y
    SNR1 = 10 * np.log10(float(np.dot(x,x))/float(np.dot(noise1,noise1)))
    
    noise2 = x[M:-M] - y[M:-M]
    SNR2 = 10 * np.log10(float(np.dot(x[M:-M],x[M:-M]))/float(np.dot(noise2,noise2)))
    return SNR1, SNR2
Ejemplo n.º 48
0
import matplotlib.pyplot as plt
import numpy as np
from stft import stft
from read import read


fname = "doubleBass.wav"

(srate, data) = read(fname, "mono")
N = 1024
hop = N//2
win = "hann"
X= stft(data, N, hop, win)

X = np.abs(X)
#mag to dec conversion
X = 20*np.log10(X)
plt.imshow(X[:N/2, :], interpolation='nearest', aspect='auto', origin='lower')
plt.colorbar()
plt.title(str(fname) + ", N = " + str(N) + ", hop = N//2,  win = hann")
plt.show()

# remove a bit of signal at the end and time-align all signals.
# the delays were visually measured by plotting the signals
n_lim = np.ceil(len(input_mic) - t_cut*Fs)
input_clean = signal1[:n_lim]
input_mic = input_mic[105:n_lim+105]
output_mvdr = output_mvdr[31:n_lim+31]
output_maxsinr = output_maxsinr[31:n_lim+31]

# save all files for listening test
wavfile.write('output_samples/input_mic.wav', Fs, input_mic)
wavfile.write('output_samples/output_maxsinr.wav', Fs, output_mvdr)
wavfile.write('output_samples/output_rake-maxsinr.wav', Fs, output_maxsinr)

# compute time-frequency planes
F0 = stft(input_clean, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F1 = stft(input_mic, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F2 = stft(output_mvdr, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F3 = stft(output_maxsinr, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)

# (not so) fancy way to set the scale to avoid having the spectrum
# dominated by a few outliers
p_min = 7
p_max = 100
    def process(self, FD=False):

        if self.signals is None or len(self.signals) == 0:
            raise NameError('No signal to beamform')

        if FD is True:

            # STFT processing

            if self.weights is None and self.filters is not None:
                self.weightsFromFilters()
            elif self.weights is None and self.filters is None:
                raise NameError('Beamforming weights or filters need to be computed first.')

            # create window function
            win = np.concatenate((np.zeros(self.zpf),
                                  windows.hann(self.L), 
                                  np.zeros(self.zpb)))

            # do real STFT of first signal
            tfd_sig = stft.stft(self.signals[0], 
                                self.L, 
                                self.hop, 
                                zp_back=self.zpb, 
                                zp_front=self.zpf,
                                transform=np.fft.rfft, 
                                win=win) * np.conj(self.weights[0])
            for i in xrange(1, self.M):
                tfd_sig += stft.stft(self.signals[i],
                                     self.L,
                                     self.hop,
                                     zp_back=self.zpb,
                                     zp_front=self.zpf,
                                     transform=np.fft.rfft,
                                     win=win) * np.conj(self.weights[i])

            #  now reconstruct the signal
            output = stft.istft(
                tfd_sig,
                self.L,
                self.hop,
                zp_back=self.zpb,
                zp_front=self.zpf,
                transform=np.fft.irfft)

            # remove the zero padding from output signal
            if self.zpb is 0:
                output = output[self.zpf:]
            else:
                output = output[self.zpf:-self.zpb]

        else:

            # TD processing

            if self.weights is not None and self.filters is None:
                self.filtersFromWeights()
            elif self.weights is None and self.filters is None:
                raise NameError('Beamforming weights or filters need to be computed first.')

            from scipy.signal import fftconvolve

            # do real STFT of first signal
            output = fftconvolve(self.filters[0], self.signals[0])
            for i in xrange(1, len(self.signals)):
                output += fftconvolve(self.filters[i], self.signals[i])


        return output
Ejemplo n.º 51
0
 def test_shape(self):
   x = scipy.sin(scipy.linspace(0,1,44100)*scipy.pi*2*440)
   X = stft(x, 1024)
   tracks = analyze(X)
   assertTrue(tracks.shape == X.shape)
Ejemplo n.º 52
0
 def test_number_of_tracks(self):
   x = scipy.sin(scipy.linspace(0,1,44100)*scipy.pi*2*440)
   X = stft(x,1024)
   tracks = analyze(X)
   assertTrue(1 == reduce(lambda x,y: if y then x + 1 else x, map(lambda x: x != 0, tracks)))
Ejemplo n.º 53
0
import matplotlib.pyplot as plt
import numpy as np
from mfcc import mfcc 
from read import read
from stft import stft

fname = "sineSweep.wav"
(srate, data) = read(fname, "mono")
N = 1024
X= stft(data, N)
X = np.abs(X)
X = X[:N/2+1]
X = mfcc(X, 44100)
#mag to dec conversion
#X = 10 * np.log10(X)
plt.imshow(X[1:], interpolation='nearest', aspect='auto', origin='lower')
plt.show()
Ejemplo n.º 54
0
Archivo: agc.py Proyecto: imclab/pyagc
def tf_agc(d, sr, t_scale=0.5, f_scale=1.0, causal_tracking=True, plot=False):
    """
    Perform frequency-dependent automatic gain control on an auditory
    frequency axis.
    d is the input waveform (at sampling rate sr);
    y is the output waveform with approximately constant
    energy in each time-frequency patch.
    t_scale is the "scale" for smoothing in time (default 0.5 sec).
    f_scale is the frequency "scale" (default 1.0 "mel").
    causal_tracking == 0 selects traditional infinite-attack, exponential release.
    causal_tracking == 1 selects symmetric, non-causal Gaussian-window smoothing.
    D returns actual STFT used in analysis.  E returns the
    smoothed amplitude envelope divided out of D to get gain control.
    """

    hop_size = 0.032  # in seconds

    # Make STFT on ~32 ms grid
    ftlen = int(2 ** np.round(np.log(hop_size * sr) / np.log(2.)))
    winlen = ftlen
    hoplen = winlen / 2
    D = stft(d, winlen, hoplen)  # using my code
    ftsr = sr / hoplen
    ndcols = D.shape[1]

    # Smooth in frequency on ~ mel resolution
    # Width of mel filters depends on how many you ask for,
    # so ask for fewer for larger f_scales
    nbands = max(10, 20 / f_scale)  # 10 bands, or more for very fine f_scale
    mwidth = f_scale * nbands / 10  # will be 2.0 for small f_scale
    (f2a_tmp, _) = fft2melmx(ftlen, sr, int(nbands), mwidth)
    f2a = f2a_tmp[:, :ftlen / 2 + 1]
    audgram = np.dot(f2a, np.abs(D))

    if causal_tracking:
        # traditional attack/decay smoothing
        fbg = np.zeros(audgram.shape)
        # state = zeros(size(audgram,1),1);
        state = np.zeros(audgram.shape[0])
        alpha = np.exp(-(1. / ftsr) / t_scale)
        for i in range(audgram.shape[1]):
            state = np.maximum(alpha * state, audgram[:, i])
            fbg[:, i] = state

    else:
        # noncausal, time-symmetric smoothing
        # Smooth in time with tapered window of duration ~ t_scale
        tsd = np.round(t_scale * ftsr) / 2
        htlen = 6 * tsd  # Go out to 6 sigma
        twin = np.exp(-0.5 * (((np.arange(-htlen, htlen + 1)) / tsd) ** 2)).T

        # reflect ends to get smooth stuff
        AD = audgram
        x = np.hstack((np.fliplr(AD[:, :htlen]),
                       AD,
                       np.fliplr(AD[:, -htlen:]),
                       np.zeros((AD.shape[0], htlen))))
        fbg = signal.lfilter(twin, 1, x, 1)

        # strip "warm up" points
        fbg = fbg[:, twin.size + np.arange(ndcols)]

    # map back to FFT grid, flatten bark loop gain
    sf2a = np.sum(f2a, 0)
    sf2a_fix = sf2a
    sf2a_fix[sf2a == 0] = 1.
    E = np.dot(np.dot(np.diag(1. / sf2a_fix), f2a.T), fbg)
    # Remove any zeros in E (shouldn't be any, but who knows?)
    E[E <= 0] = np.min(E[E > 0])

    # invert back to waveform
    y = istft(D / E, winlen, hoplen, window=np.ones(winlen))  # using my code

    if plot:
        try:
            import matplotlib.pyplot as plt
            plt.subplot(3, 1, 1)
            plt.imshow(20. * np.log10(np.flipud(np.abs(D))))
            plt.subplot(3, 1, 2)
            plt.imshow(20. * np.log10(np.flipud(np.abs(E))))
            A = stft(y, winlen, hoplen)  # using my code
            plt.subplot(3, 1, 3)
            plt.imshow(20. * np.log10(np.flipud(np.abs(A))))
            plt.show()
        except Exception, e:
            print "Failed to plot results"
            print e
Ejemplo n.º 55
0
import math
from scipy.signal import get_window
import matplotlib.pyplot as plt

# params
inputFile = '../../sounds/sax-phrase-short.wav'
window    = 'hamming'
M = 512
N = 1024
H = 64

sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/'))
import stft
import utilFunctions as UF
eps = np.finfo(float).eps

fs, x = UF.wavread(inputFile)
w = get_window(window, M)
y = stft.stft(x, fs, w, N, H)

noise = x - y

E_x = np.sum( abs(x)**2 )
E_noise = np.sum( abs(noise)**2 )

E_partial_x = np.sum( abs(x[ M : x.size - M ])**2 )
E_partial_noise = np.sum( abs(noise[ M : noise.size - M ])**2 )

SNR1 = 10 * np.log10( E_x / E_noise )
SNR2 = 10 * np.log10( E_partial_x  / E_partial_noise )
Ejemplo n.º 56
0
import numpy as np
import matplotlib.pyplot as plt
from stft import stft

if __name__ == "__main__":
    fr = 44100  # framerate
    time = np.arange(0, 5, 1.0/fr)

    # Generate 100 Hz sin wave
    sig = np.sin(100*time)
    plt.plot(time, sig)
    plt.axis([0, 5, -2, 2])
    plt.show()

    # Generate windows
    windows = stft(sig)
    print len(windows)
    plt.plot(np.abs(windows[8]))
    plt.plot(np.abs(windows[9]))
    plt.show()

Ejemplo n.º 57
0
    def process(self):

        if (self.signals is None or len(self.signals) == 0):
            raise NameError('No signal to beamform')

        if self.processing is 'FrequencyDomain':

            # create window function
            win = np.concatenate((np.zeros(self.zpf),
                                  windows.hann(self.L), 
                                  np.zeros(self.zpb)))

            # do real STFT of first signal
            tfd_sig = stft.stft(self.signals[0], 
                                self.L, 
                                self.hop, 
                                zp_back=self.zpb, 
                                zp_front=self.zpf,
                                transform=np.fft.rfft, 
                                win=win) * np.conj(self.weights[0])
            for i in xrange(1, self.M):
                tfd_sig += stft.stft(self.signals[i],
                                     self.L,
                                     self.hop,
                                     zp_back=self.zpb,
                                     zp_front=self.zpf,
                                     transform=np.fft.rfft,
                                     win=win) * np.conj(self.weights[i])

            #  now reconstruct the signal
            output = stft.istft(
                tfd_sig,
                self.L,
                self.hop,
                zp_back=self.zpb,
                zp_front=self.zpf,
                transform=np.fft.irfft)

            # remove the zero padding from output signal
            if self.zpb is 0:
                output = output[self.zpf:]
            else:
                output = output[self.zpf:-self.zpb]

        elif self.processing is 'TimeDomain':

            # go back to time domain and shift DC to center
            tw = np.sqrt(self.weights.shape[1])*np.fft.irfft(np.conj(self.weights), axis=1)
            tw = np.concatenate((tw[:, self.N/2:], tw[:, :self.N/2]), axis=1)

            from scipy.signal import fftconvolve

            # do real STFT of first signal
            output = fftconvolve(tw[0], self.signals[0])
            for i in xrange(1, len(self.signals)):
                output += fftconvolve(tw[i], self.signals[i])

        elif self.processing is 'Total':

            W = np.concatenate((self.weights, np.conj(self.weights[:,-2:0:-1])), axis=1)
            W[:,0] = np.real(W[:,0])
            W[:,self.N/2] = np.real(W[:,self.N/2])

            F_sig = np.zeros(self.signals.shape[1], dtype=complex)
            for i in xrange(self.M):
                F_sig += np.fft.fft(self.signals[i])*np.conj(W[i,:])

            f_sig = np.fft.ifft(F_sig)
            print np.abs(np.imag(f_sig)).mean()
            print np.abs(np.real(f_sig)).mean()

            output = np.real(np.fft.ifft(F_sig))

        return output