Example #1
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    ## your code here
    def energy(mag):
        e = np.sum((10**(mag / 20))**2)
        return e

    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)

    mX, pX = STFT.stftAnal(x, fs, w, N, H)
    y = STFT.stftSynth(mX, pX, M, H)
    n = x - y[:x.size]
    n2 = x[w.size:-w.size] - y[:x.size][w.size:-w.size]

    mN, pN = STFT.stftAnal(n, fs, w, N, H)
    mN2, pN2 = STFT.stftAnal(n2, fs, w, N, H)

    snr1 = 10 * np.log10(energy(mX) / energy(mN))
    snr2 = 10 * np.log10(energy(mX) / energy(mN2))

    return snr1, snr2
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    def energy(mag):
        e = np.sum((10 ** (mag / 20)) ** 2)
        return e
    
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)
    
    mX, pX = STFT.stftAnal(x, fs, w, N, H)
    y = STFT.stftSynth(mX, pX, M, H)
    n = x - y[:x.size]
    n2 = x[w.size:-w.size] - y[:x.size][w.size:-w.size]
    
    mN, pN = STFT.stftAnal(n, fs, w, N, H)
    mN2, pN2 = STFT.stftAnal(n2, fs, w, N, H)
    
    snr1 = 10 * np.log10(energy(mX) / energy(mN))
    snr2 = 10 * np.log10(energy(mX) / energy(mN2))
        
    return snr1, snr2
Example #3
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    (fs,x) = UF.wavread(inputFile)
    w = get_window(window, M)
    (xmX, xpX) = stft.stftAnal(x, fs, w, N, H)

    kLow1 = 0
    
    kLow2 = 0
    while (True):
	kLow2 += 1
	if( (kLow2 < N*(fLow2)/float(fs)) & (kLow2 > N*(fLow2)/float(fs) - 1.0 ) ):
	    break
    
    kHigh1 = 0
    while (True):
	kHigh1 += 1
	if( (kHigh1 < N*(fHigh1)/float(fs)) & (kHigh1 > N*(fHigh1)/float(fs) - 1.0 ) ):
	    break
    
    kHigh2 = 0
    while (True):
	kHigh2 += 1
	if( (kHigh2 < N*(fHigh2)/float(fs)) & (kHigh2 > N*(fHigh2)/float(fs) - 1.0 ) ):
	    break
    
    nHops = int(xmX.shape[0])
    out = np.zeros((nHops,2))
    
    i = 0
    while i < nHops:
        subxmX = xmX[i,:]
    
        subLowxmX = subxmX[kLow1+1:kLow2+1]
        subLowxmX = 10**(subLowxmX/20)
        eSignalLow = sum(subLowxmX**2)
        out[i,0] = 10.0*np.log10(eSignalLow)

        subHighxmX = subxmX[kHigh1+1:kHigh2+1]
        subHighxmX = 10**(subHighxmX/20)
        eSignalHigh = sum(subHighxmX**2)
        out[i,1] = 10.0*np.log10(eSignalHigh)
    
        i += 1 

    return out
Example #4
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    ### your code here
    w = get_window(window, M, False)
    fs, x = UF.wavread(inputFile)
    xmX, xpX = stft.stftAnal(x, w, N, H)
    xmX = 10**(xmX / 20)
    k3000 = int(np.floor(3000 * N / fs)) + 1
    k10000 = int(np.floor(10000 * N / fs)) + 1

    band1 = xmX[:, 1:k3000]
    band2 = xmX[:, k3000:k10000]
    sband1 = np.multiply(band1, band1)
    sband2 = np.multiply(band2, band2)
    eband1 = np.sum(sband1, axis=1)
    eband2 = np.sum(sband2, axis=1)
    dbeband1 = 10 * np.log10(eband1)
    dbeband2 = 10 * np.log10(eband2)
    result = np.vstack((dbeband1, dbeband2))
    result = np.transpose(result)
    return result
Example #5
0
def sineODF(file='../../../../../audioDSP_course/assignments/sms-tools/sounds/piano.wav'):
    fs, x = UF.wavread(file)

    # set params:
    M = 1024    # window size
    H = int(M/3)     # hop size
    t = -80.0   #treshold (dB??)
    window = 'blackman' # window type
    fftSize = int(pow(2, np.ceil(np.log2(M))))  # size of FFT
    N = fftSize
    maxnSines = 10      # maximum simultaneous sines
    minSineDur = 0.1    # minimal duration of sines
    freqDevOffset = 30  # min(??) frequency deviation at 0Hz
    freqDevSlope = 0.001    # slope increase of min freq dev.


    w = get_window(window, M)    # get analysis window
    tStamps = genTimeStamps(len(x), M, fs, H)    # generate timestamp return?
    fTrackEst, mTrackEst, pTreckEst = SM.sineModelAnal(x, fs, w, fftSize, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    fTrackTrue = genTrueFreqTracks(tStamps) # get true freq. tracks

    # plotting:
    mX, pX = stft.stftAnal(x, fs, w, fftSize, H)
    maxplotfreq = 1500.0
    binFreq = fs*np.arange(N*maxplotfreq/fs)/N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]),cmap = 'hot_r')
    # plt.plot(fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color = 'y', linewidth=2.0)
    # plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return fTrackEst
Example #6
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    def calculateEnergy(mY):
        eDB = 10 * np.log10(np.sum((10**(mY / 20))**2, axis=1))
        return eDB

    (fs, x) = UF.wavread(inputFile)
    lowerBin = int(np.ceil(float(3000) * N / fs))
    upperBin = int(np.ceil(float(10000) * N / fs))
    w = get_window(window, M)

    mX, pX = stft.stftAnal(x, w, N, H)
    lowerBand = np.transpose(np.transpose(mX)[1:lowerBin])
    upperBand = np.transpose(np.transpose(mX)[lowerBin:upperBin])

    eDB_low = calculateEnergy(lowerBand)
    eDB_high = calculateEnergy(upperBand)

    engEnv = np.append([eDB_low], [eDB_high], axis=0)
    engEnv = np.transpose(engEnv)

    return engEnv
Example #7
0
def plotSpectogramF0Segments(x, fs, w, N, H, f0, segments):
    """
    Code for plotting the f0 contour on top of the spectrogram
    """
    # frequency range to plot
    maxplotfreq = 1000.0    
    fontSize = 16

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, fs, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=5)

    for ii in range(segments.shape[0]):
        plt.plot(timeStamps[segments[ii,0]:segments[ii,1]], f0[segments[ii,0]:segments[ii,1]], color = '#A9E2F3', linewidth=1.5)        
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0','segments'))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    
    plt.autoscale(tight=True) 
    plt.show()
Example #8
0
def plotSpectogramF0Segments(x, fs, w, N, H, f0, segments):
    """
    Code for plotting the f0 contour on top of the spectrogram
    """
    # frequency range to plot
    maxplotfreq = 1000.0    
    fontSize = 16

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=5)

    for ii in range(segments.shape[0]):
        plt.plot(timeStamps[segments[ii,0]:segments[ii,1]], f0[segments[ii,0]:segments[ii,1]], color = '#A9E2F3', linewidth=1.5)        
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0','segments'))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    
    plt.autoscale(tight=True) 
    plt.show()
Example #9
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    def energy(x):
        e = np.sum(np.abs(x)**2)
        return e

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M, False)

    mX, pX = stft.stftAnal(x, w, N, H)
    y = stft.stftSynth(mX, pX, M, H)
    n = x - y[:x.size]
    n2 = x[w.size:-w.size] - y[:x.size][w.size:-w.size]

    SNR1 = 10 * np.log10(energy(y) / energy(n))
    SNR2 = 10 * np.log10(energy(y) / energy(n2))

    return SNR1, SNR2
Example #10
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    ### your code here
    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    mX = stft.stftAnal(x, fs, w, N, H)[0]

    X = 10 ** (mX / 20.0)

    b3k = int(N*3000.0/fs)
    b10k = int(N*10000.0/fs)

    o3k = odf(X[:, 1:b3k+1])
    o10k = odf(X[:, b3k+1:b10k+1])

    return np.column_stack((o3k, o10k))
Example #11
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    w = get_window(window, M, False)
    (fs, x) = UF.wavread(inputFile)

    (mX, pX) = stft.stftAnal(x, w, N, H)
    y = stft.stftSynth(mX, pX, M, H)
    noise = x-y[:x.size]

    # get energy of x
    enX1 = np.sum(abs(x)*abs(x))
    enNoise = np.sum(abs(noise)*abs(noise))
    
    enX2 = np.sum(abs(x[M:-M])*abs(x[M:-M]))
    enNoise2 = np.sum(abs(noise[M:-M])*abs(noise[M:-M]))
    SNR1 = 10*np.log10(enX1/enNoise)
    SNR2 = 10*np.log10(enX2/enNoise2)
    return (SNR1, SNR2)
Example #12
0
def main(inputFile , window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)
	  
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
Example #13
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M, False)
    xmX, xpX = stft.stftAnal(x, w, N, H)
    result = []
    for mX in xmX:
        mXLinear = pow(10, mX / 20)
        freq = np.arange(mXLinear.size) * fs / N
        mXLow = np.where((freq > 0) & (freq < 3000), mXLinear, 0)
        mXHigh = np.where((freq > 3000) & (freq < 10000), mXLinear, 0)
        ELow = np.sum(np.square(abs(mXLow)))
        EHigh = np.sum(np.square(abs(mXHigh)))
        ELowDB = 10 * np.log10(ELow)
        EHighDB = 10 * np.log10(EHigh)
        result.append([ELowDB, EHighDB])
    return np.asarray(result)
Example #14
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here
    (fs,x) = UF.wavread(inputFile)
    w = get_window(window, M)

    mX, pX = stft.stftAnal(x, w, N, H)
    y = stft.stftSynth(mX, pX, M, H)

    n1 = x - y[:x.size] # Get to where signal lies in y: test the dimension of y
    n2 = x[M:-M] - y[:x.size][M:-M]

    def calculate_energy(x):
    	e = np.sum(x**2)
    	return e

    SNR1 = 10*np.log10(calculate_energy(x)/calculate_energy(n1))
    SNR2 = 10*np.log10(calculate_energy(x)/calculate_energy(n2))

    return (SNR1, SNR2)
Example #15
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning,
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    mX, pX = stft.stftAnal(x, w, N, H)
    mX_lin = 10**(mX / 20)
    T = float(N) / fs
    bin_freqs = np.arange(N) / T
    low_cutoff = 3000
    high_cutoff = 10000
    k1 = np.argmin(bin_freqs < low_cutoff)
    k2 = np.argmax(bin_freqs > low_cutoff)
    k3 = np.argmin(bin_freqs < high_cutoff)
    return np.vstack((bandE(mX_lin, 1, k1), bandE(mX_lin, k2, k3))).T
Example #16
0
def compute_eng_env(inputFile, window, M, N, H):
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)

    mX, pX = stft.stftAnal(x, w, N, H)
    mXlinear = 10.0**(mX / 20.0)

    # Get an array of indices for bins within each band range:

    # Using list comprehension:
    # band_low_bins = np.array([ k for k in range(N) if 0 < k * fs / N < 3000.0])
    # band_high_bins = np.array([ k for k in range(N) if 3000.0 < k * fs / N < 10000.0])

    # Using np.where():
    bins = np.arange(0, N) * fs / N
    band_low_bins = np.where((bins > 0) & (bins < 3000.0))[0]
    band_high_bins = np.where((bins > 3000) & (bins < 10000.0))[0]

    num_frames = mX.shape[0]
    env = np.zeros(shape=(num_frames, 2))

    for frame in range(num_frames):
        env[frame, 0] = 10.0 * np.log10(sum(mXlinear[frame, band_low_bins]**2))
        env[frame,
            1] = 10.0 * np.log10(sum(mXlinear[frame, band_high_bins]**2))

    plot_spectrogram_with_energy_envelope(
        mX, env, M, N, H, fs,
        'mX ({}), M={}, N={}, H={}'.format(inputFile, M, N, H))

    return fs, mX, env
Example #17
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    ### your code here
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    mx, px = stft.stftAnal(x, w, N, H)
    mx = 10**(mx / 20.)
    res = []
    for i in range(0, len(mx)):
        low = 0.
        high = 0.
        for j in range(1, len(mx[i])):
            rate = fs * j / (N + 0.0)
            if rate < 3000:
                low += mx[i][j]**2
            elif rate < 10000:
                #print j
                high += mx[i][j]**2
        res.append([10 * np.log10(low), 10 * np.log10(high)])
    return np.array(res)
Example #18
0
def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512):
	"""
	analysis/synthesis using the STFT
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)  
	H: hop size (at least 1/2 of analysis window size to have good overlap-add)               
	"""

	# read input sound (monophonic with sampling rate of 44100)
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the magnitude and phase spectrogram
	mX, pX = STFT.stftAnal(x, fs, w, N, H)
	 
	# perform the inverse stft
	y = STFT.stftSynth(mX, pX, M, H)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav'   

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mX, pX, y
Example #19
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    fs,x = UF.wavread(inputFile)
    w = get_window(window,M)

    mX,pX = stft.stftAnal(x,w,N,H)
    mX = pow(10,mX/20.)
    
    band_energy = np.zeros((len(mX),2))
    for frm_idx in range(len(mX)):
        frm = mX[frm_idx]
        for k in range(len(frm)):
            cur_f = k*44100/N
            if cur_f > 0 and cur_f < 3000:
                band_energy[frm_idx,0] += (frm[k]*frm[k])
            elif cur_f > 3000 and cur_f < 10000:
                band_energy[frm_idx,1] += (frm[k]*frm[k])

    band_energy = 10.0*np.log10(band_energy)
    return band_energy
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    
    ### your code here
    windowing = get_window(window, M)

    (fs, x) = UF.wavread(inputFile)

    mX, pX = stft.stftAnal(x, fs, windowing, N, H)

    bin0 = 1
    bin3000 = np.floor(3000.0*N/fs)
    bin10000 = np.floor(10000.0*N/fs)
    bin3000up = np.ceil(3000.0*N/fs)

    ODF = np.zeros((mX.shape[0], 2))

    prevODF3000 = 0.0
    prevODF10000 = 0.0

    for i in range(mX.shape[0]):
        env3000 = np.sum(np.square(10**(mX[i,1:bin3000+1] / 20)))
        env3000db = 10 * np.log10(env3000)

        odf3000 = env3000db - prevODF3000
        prevODF3000 = env3000db
 
        if odf3000 <= 0.0:
            odf3000 = 0.0

        ODF[i,0] = odf3000
        

        env10000 = np.sum(np.square(10**(mX[i,bin3000up:bin10000+1] / 20)))
        env10000db = 10 * np.log10(env10000)

        odf10000 = env10000db - prevODF10000
        prevODF10000 = env10000db

        if odf10000 <= 0.0:
            odf10000 = 0.0

        ODF[i,1] = odf10000

        
    return ODF
Example #21
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
        inputFile (string): input sound file (monophonic with sampling rate of 44100)
        window (string): analysis window type (choice of rectangular, triangular, hanning, 
            hamming, blackman, blackmanharris)
        M (integer): analysis window size (odd positive integer)
        N (integer): FFT size (power of 2, such that N > M)
        H (integer): hop size for the stft computation
    Output:
        The function should return a numpy array engEnv with shape Kx2, K = Number of frames
        containing energy envelop of the signal in decibles (dB) scale
        engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
        engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    ### your code here

    # first do stft.
    fftbins = M % 2 == 0
    w = get_window(window, M, fftbins)
    fs, x = UF.wavread(inputFile)
    mX, pX = stft.stftAnal(x, w, N, H)

    # get bin index for 3000hz and 10000hz
    lowfreq_idx = int(3000 * N / fs)
    highfreq_idx = int(10000 * N / fs)
    print("low freq ", lowfreq_idx)
    print("high freq ", highfreq_idx)

    mX_linear = 10**(mX / 20)

    # compute low freq band energies.
    mX_lowfreq = mX_linear[:, 1:lowfreq_idx + 1]
    E_lowfreq = 10 * np.log10(np.sum(mX_lowfreq**2, axis=1, keepdims=True))

    # compute high freq band energies
    mX_highfreq = mX_linear[:, lowfreq_idx + 1:highfreq_idx + 1]
    E_highfreq = 10 * np.log10(np.sum(mX_highfreq**2, axis=1, keepdims=True))
    print(mX_highfreq.shape)

    plt.figure(1, figsize=(9.5, 6))
    plt.subplot(211)
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(N / 2 + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
    plt.title('mX, M={}, N={}, H={}'.format(M, N, H))
    plt.autoscale(tight=True)

    plt.subplot(212)
    plt.plot(frmTime, E_lowfreq, label="low freq")
    plt.plot(frmTime, E_highfreq, label="high freq")

    plt.tight_layout()
    plt.legend()
    plt.show()

    return np.concatenate([E_lowfreq, E_highfreq], axis=1)
Example #22
0
def chirpTracker(inputFile='../../sounds/chirp-150-190-linear.wav'):
    """
    Input:
           inputFile (string) = wav file including the path
    Output:
           M (int) = Window length
           H (int) = hop size in samples
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst (numpy array) = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue (numpy array) = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
           K is the number of frames
    """
    # Analysis parameters: Modify values of the parameters marked XX
    M = 3300  # Window size in samples

    ### Go through the code below and understand it, do not modify anything ###
    H = 128  # Hop size in samples
    N = int(pow(2, np.ceil(np.log2(M))))  # FFT Size, power of 2 larger than M
    t = -80.0  # threshold
    window = 'blackman'  # Window type
    maxnSines = 2  # Maximum number of sinusoids at any time frame
    minSineDur = 0.0  # minimum duration set to zero to not do tracking
    freqDevOffset = 30  # minimum frequency deviation at 0Hz
    freqDevSlope = 0.001  # slope increase of minimum frequency deviation

    fs, x = UF.wavread(inputFile)  # read input sound
    w = get_window(window, M)  # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)  # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t,
                                                       maxnSines, minSineDur,
                                                       freqDevOffset,
                                                       freqDevSlope)
    fTrackTrue = genTrueFreqTracks(
        tStamps)  # Generate the true frequency tracks
    tailF = 20
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] -
                             fTrackEst[tailF:-tailF, :]),
                      axis=0)
    print "Mean estimation error = " + str(
        meanErr) + ' Hz'  # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 1500.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps,
                   binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]),
                   cmap='hot_r')
    plt.plot(tStamps, fTrackTrue, 'o-', color='c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color='y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return M, H, tStamps, fTrackEst, fTrackTrue  # Output returned
Example #23
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """

    # Calculate the SNR after synthesis and analysis STFT

    w = get_window(window, M)

    # SNR (signal to noise ratio) = 10log10(Energy of signal / Energy of noise)

    (fs, x) = UF.wavread(inputFile)

    # Do analysis and synthesis
    mX, pX = stft.stftAnal(x, w, N, H)
    y = stft.stftSynth(mX, pX, M, H)

    # Resizing y so we can calculate energy of noise
    resized_y = y[:x.size]

    # Calculating the noise of part 1 and 2
    noise1 = x - resized_y
    noise2 = x[w.size:-w.size] - resized_y[w.size:-w.size]

    # Analyse both noises
    mNoise1, pNoise1 = stft.stftAnal(noise1, w, N, H)
    mNoise2, pNoise2 = stft.stftAnal(noise2, w, N, H)

    energyInput = energy_computation(mX)
    energyNoise1 = energy_computation(mNoise1)
    energyNoise2 = energy_computation(mNoise2)

    SNR1 = 10 * np.log10(energyInput / energyNoise1)
    SNR2 = 10 * np.log10(energyInput / energyNoise2)

    return SNR1, SNR2
Example #24
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    def energy(mag):
        e = 10 * np.log10(np.sum((10 ** (mag / 20)) ** 2, axis=1))
        return e
    
    (fs, x) = UF.wavread(inputFile)
    border_bin = int(np.ceil(float(3000) * N / fs))
    max_bin = int(np.ceil(float(10000) * N / fs))
    w = get_window(window, M)
    
    mX, pX = STFT.stftAnal(x, fs, w, N, H)
    low = np.transpose(np.transpose(mX)[1:border_bin])
    high = np.transpose(np.transpose(mX)[border_bin:max_bin])
    
    e_low = energy(low)
    e_high = energy(high)
    
    envs = np.append([e_low], [e_high], axis=0)
    envs = np.transpose(envs)
    
    # draw graph
    plt.figure(1, figsize=(9.5, 6))

    plt.subplot(211)
    numFrames = mX.shape[0]
    frmTime = H*np.arange(numFrames)/float(fs)
    binFreq = np.arange(mX.shape[1])*float(fs)/N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
    plt.title('mX ({0}), M={1}, N={2}, H={3}'.format(inputFile, M, N, H))
    plt.autoscale(tight=True)
    
    plt.subplot(212)
    plt.plot(frmTime, e_low, color="blue", label="row")
    plt.plot(frmTime, e_high, color="red", label="high")
    plt.title('Energy of Envelopes')
    plt.autoscale(tight=True)

    plt.tight_layout()
    plt.show()
    
    return envs
Example #25
0
def mainlobeTracker(inputFile='../../sounds/sines-440-602-hRange.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           window (string): The window type used for analysis
           t (float) = peak picking threshold (negative dB)
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
    """
    # Analysis parameters: Modify values of the parameters marked XX
    window = 'blackman'  # Window type
    t = -80  # threshold (negative dB)

    ### Go through the code below and understand it, do not modify anything ###
    M = 2047  # Window size
    N = 4096  # FFT Size
    H = 128  # Hop size in samples
    maxnSines = 2
    minSineDur = 0.02
    freqDevOffset = 10
    freqDevSlope = 0.001
    # read input sound
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)  # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)  # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t,
                                                       maxnSines, minSineDur,
                                                       freqDevOffset,
                                                       freqDevSlope)
    fTrackTrue = genTrueFreqTracks(
        tStamps)  # Generate the true frequency tracks
    tailF = 20
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] -
                             fTrackEst[tailF:-tailF, :]),
                      axis=0)
    print "Mean estimation error = " + str(
        meanErr) + ' Hz'  # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 900.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps,
                   binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]),
                   cmap='hot_r')
    plt.plot(tStamps, fTrackTrue, 'o-', color='c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color='y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return window, float(t), tStamps, fTrackEst, fTrackTrue  # Output returned
Example #26
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    ### your code here
    def energy(mag):
        e = 10 * np.log10(np.sum((10**(mag / 20))**2, axis=1))
        return e

    (fs, x) = UF.wavread(inputFile)
    border_bin = int(np.ceil(float(3000) * N / fs))
    max_bin = int(np.ceil(float(10000) * N / fs))
    w = get_window(window, M)

    mX, pX = STFT.stftAnal(x, fs, w, N, H)
    low = np.transpose(np.transpose(mX)[1:border_bin])
    high = np.transpose(np.transpose(mX)[border_bin:max_bin])

    e_low = energy(low)
    e_high = energy(high)

    envs = np.append([e_low], [e_high], axis=0)
    envs = np.transpose(envs)

    # draw graph
    plt.figure(1, figsize=(9.5, 6))

    plt.subplot(211)
    numFrames = mX.shape[0]
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(mX.shape[1]) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
    plt.title('mX ({0}), M={1}, N={2}, H={3}'.format(inputFile, M, N, H))
    plt.autoscale(tight=True)

    plt.subplot(212)
    plt.plot(frmTime, e_low, color="blue", label="row")
    plt.plot(frmTime, e_high, color="red", label="high")
    plt.title('Energy of Envelopes')
    plt.autoscale(tight=True)

    plt.tight_layout()
    plt.show()

    return envs
Example #27
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal plus residual analysis
    tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur,
                                               maxnSines, freqDevOffset,
                                               freqDevSlope)

    # compute spectrogram of residual
    mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

    # sum sinusoids and residual
    y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_sines.wav'
    outputFileResidual = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_residual.wav'
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel.wav'

    # write sounds files for sinusoidal, residual, and the sum
    UF.wavwrite(ys, fs, outputFileSines)
    UF.wavwrite(xr, fs, outputFileResidual)
    UF.wavwrite(y, fs, outputFile)
    return x, fs, mXr, tfreq, y
Example #28
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    def energy(mY):
        eDB = 10 * np.log10(np.sum((10**(mY / 20))**2, axis=1))
        return eDB

    (fs, x) = UF.wavread(inputFile)
    low_bound = int(np.ceil(float(3000) * N / fs))
    high_bound = int(np.ceil(float(10000) * N / fs))
    w = get_window(window, M)

    mX, pX = stft.stftAnal(x, w, N, H)
    low_band = np.transpose(np.transpose(mX)[1:low_bound])
    high_band = np.transpose(np.transpose(mX)[low_bound:high_bound])

    eDB_low = energy(low_band)
    eDB_high = energy(high_band)

    engEnv = np.append([eDB_low], [eDB_high], axis=0)
    engEnv = np.transpose(engEnv)

    plt.subplot(211)
    numFrames = int(mX[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(N / 2 + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
    plt.title('Spectrogram')
    plt.ylabel('frequency (Hz)')
    plt.autoscale(tight=True)

    plt.subplot(212)
    plt.plot(frmTime, eDB_low, color="blue", label="low")
    plt.plot(frmTime, eDB_high, color="green", label="high")
    plt.title('Energy Envelopes')
    plt.ylabel('Energy (dB)')
    plt.autoscale(tight=True)

    plt.tight_layout()
    plt.savefig('engEnv.png')
    plt.show()

    return engEnv
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    # read the input sound
    fs, signal = UF.wavread(inputFile)
    # compute window
    w = get_window(window, M)
    # compute the spectrum
    magnitude_frames, p = stft.stftAnal(signal, fs, w, N, H)

    # compute the boundaries for the energy bands
    k_3000 = 3000 * float(N) / fs
    k_10000 = 10000 * float(N) / fs

    # set up variables to hold the energy values
    # energy_low = 0
    # energy_high = 0

    # set up array to hold the energy values
    output_frame = []

    # loop through array and collect energy
    for frame in magnitude_frames:
        energy_low = 0
        energy_high = 0
        L = len(frame)
        for i in range(1, L):
            if i < k_3000:
                energy_low += (10 ** (frame[i] / 20)) ** 2
            elif i < k_10000 and i > k_3000:
                energy_high += (10 ** (frame[i] / 20)) ** 2

        # compute decibel value of energy
        energy_low = 10 * np.log10(energy_low)
        energy_high = 10 * np.log10(energy_high)

        output_frame.append([energy_low, energy_high])

    return np.array(output_frame)
Example #30
0
def computeAndPlotF0(inputFile = '../../sounds/piano.wav'):
    """
    Function to estimate fundamental frequency (f0) in an audio signal using TWM.
    Input:
        inputFile (string): wav file including the path    
    """
    window='hamming'
    M=2048
    N=2048
    H=256
    f0et=5.0
    t=-80
    minf0=100
    maxf0=300

    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ## Code for plotting the f0 contour on top of the spectrogram
    # frequency range to plot
    maxplotfreq = 500.0    
    fontSize = 16
    plot = 1

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, fs, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=1.5)
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0',))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    

    if plot == 1: 
        plt.autoscale(tight=True) 
        plt.show()
    elif plot == 2:                   #you can save the plot too!
        fig.tight_layout()
        fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight')
Example #31
0
def computeAndPlotF0(inputFile = '../sms-tools/sounds/piano.wav'):
    """
    Function to estimate fundamental frequency (f0) in an audio signal using TWM.
    Input:
        inputFile (string): wav file including the path    
    """
    window='hamming'
    M=2048
    N=2048
    H=256
    f0et=5.0
    t=-80
    minf0=100
    maxf0=300

    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    f0 = f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0

    ## Code for plotting the f0 contour on top of the spectrogram
    # frequency range to plot
    maxplotfreq = 500.0    
    fontSize = 16
    plot = 1

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=1.5)
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0',))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    

    if plot == 1: 
        plt.autoscale(tight=True) 
        plt.show()
    elif plot == 2:                   #you can save the plot too!
        fig.tight_layout()
        fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight')
Example #32
0
def computeEngEnv(inputFile, window, M, N, H):

    w = get_window(window, M)

    (fs, x) = UF.wavread(inputFile)

    mX, pX = stft.stftAnal(x, w, N, H)

    size = int(N / 2) - 1
    freq = np.zeros(size)
    count = 0

    for val in range(size):
        freq[val] = val * float(fs) / N

    # Low frequency: freq > 0 and freq < 3000 (np.where can only do one cond)
    high_freq = np.where((freq > 3000) & (freq < 10000))

    engEnv = np.array([])

    LFL = []  # Low frequency list

    # https://stackoverflow.com/questions/21887138/iterate-over-the-output-of-np-where
    low_freq = zip(*np.where((freq > 0) & (freq < 3000)))
    high_freq = zip(*np.where((freq > 3000) & (freq < 10000)))
    # Can do this or calculate k * fs / N

    # Need to convert because of tuples, finds bounds
    UB_low = max(low_freq)[0]
    LB_high = min(high_freq)[0]
    UB_high = max(high_freq)[0]

    # Get FFT size / 2 + 1
    resize = int(N / 2) + 1
    new_size = int(mX.size / resize)

    low = np.zeros(shape=(new_size, UB_low))
    high = np.zeros(shape=(new_size, UB_high - LB_high + 1))

    for i in range(new_size):
        low[i] = mX[i][1:LB_high]
        high[i] = mX[i][LB_high:UB_high + 1]

    # Compute energy (energy conversions using log and sum ** 2)
    low_energy = energy_computation(low)
    high_energy = energy_computation(high)

    # Change to right structure
    engEnvs = np.append([low_energy], [high_energy], axis=0)
    engEnvs = np.transpose(engEnvs)

    return engEnvs
Example #33
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """

    ### your code here

    # read file, get fs and signal
    fs, x = UF.wavread(inputFile)

    # get window
    w = get_window(window, M)

    # stft
    mX, _ = stft.stftAnal(x, w, N, H)

    # convert to linear scale
    mX = 10**(mX / 20)

    num_frame = mX.shape[0]
    engEnv = np.zeros((num_frame, 2))

    fre_in_each_bin = fs / N

    k0_low = 1
    k1_low = int(3000 // fre_in_each_bin)

    k0_high = k1_low + 1
    k1_high = int(10000 // fre_in_each_bin)

    for i in range(num_frame):
        # energy envelope in (0, 3000)
        engEnv[i, 0] = np.sum(np.square(mX[i, k0_low:k1_low]))

        # energy envelope in (3000, 10000)
        engEnv[i, 1] = np.sum(np.square(mX[i, k0_high:k1_high]))

    # convert to db
    engEnv = 10 * np.log10(engEnv)
    return engEnv
Example #34
0
def find_chirp_end_ms_odf(input_path):
    def dB2energydB(mdB):
        m = 10 ** (mdB / 20.)
        energy_ = m ** 2.
        #m = 10 * np.log10(m.sum())
        energy_ = 10 * np.log10(np.sum(energy_))
        return energy_

    (fs, x) = UF.wavread(input_path)
    w = get_window(window, M)
    xmX, xpX = stft.stftAnal(x, w, N, H)
    numFrames = int(xmX[:,0].size) #Get number of frames (time slices)
    binFreq = np.arange(N/2+1)*float(fs)/N #Creating array of bin frequencies (positive side only)

    highBandIdx3000 = np.where(binFreq > 1000)[0][0]
    highBandIdx10000 = np.where(binFreq < 1002)[0][-1]

    # calculate energy per band
    engEnv = np.zeros([numFrames])
    for idx_frame in range(numFrames):
        engEnv[idx_frame] = dB2energydB(xmX[idx_frame, highBandIdx3000:highBandIdx10000+1])

    # plt.figure(1, figsize=(9.5, 6))
    #
    # plt.subplot(211)
    # numFrames = int(xmX[:,0].size)
    # frmTime = H*np.arange(numFrames)/float(fs)
    # binFreq = np.arange(N/2+1)*float(fs)/N
    # plt.pcolormesh(frmTime, binFreq, np.transpose(xmX))
    # plt.title('mX (piano.wav), M=1001, N=1024, H=256')
    # plt.autoscale(tight=True)
    #
    # plt.subplot(212)
    # numFrames = int(xmX[:,0].size)
    # frmTime = H*np.arange(numFrames)/float(fs)
    # binFreq = np.arange(N/2+1)*float(fs)/N
    # #plt.pcolormesh(frmTime, binFreq, np.diff(np.transpose(xpX),axis=0))
    # #plt.plot(odf[:,0])
    # plt.plot(abs(odf[:,1]))
    # plt.title('ODF adsfsf')
    # plt.autoscale(tight=True)
    #
    # plt.tight_layout()
    # plt.savefig('spectrogram.png')
    # plt.show()

    maxIndex = np.argmax(engEnv)
    timePercent = maxIndex * 1.0 / engEnv.size
    audioLength = x.size / fs
    end_of_chirp = audioLength * timePercent
    return end_of_chirp * 1000
Example #35
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    #get wav file and sampling rate from input File
    (fs, x) = UF.wavread(inputFile)
    
    #get window for STFT
    w = get_window(window, M, False)
    
    #Compute magnitude spectra
    xmX, _ = stft.stftAnal(x, w, N, H)
    
    #Convert the magnitude spectra from dB to linear scale
    xmX = 10 ** (xmX/20)
    
    #Find the border between log and high freq.
    lowFreq_threshold = 3000
    highFreq_threshold = 10000
    lowFreq_bin = lowFreq_threshold * N / fs
    highFreq_bin = highFreq_threshold * N / fs
    
    #compute the energy envelops
    #initialize the Enegey evenlope
    engEnvLow = np.array([])
    engEnvHigh = np.array([])
    
    #interate through ever frame to calculate the respected energy band, exclude the lower boundary of each
    #band. Also convert to dB scale
    for mX in xmX:
        engEnvLow = np.append(engEnvLow,  10 * np.log10(sum(mX[1:lowFreq_bin+1] ** 2)))
        engEnvHigh = np.append(engEnvHigh, 10* np.log10(sum(mX[lowFreq_bin+1:highFreq_bin+1] ** 2)))
           
    engEnv = np.array([engEnvLow, engEnvHigh])
    
    return np.transpose(engEnv)
    
    
Example #36
0
def mainlobeTracker(inputFile = '../sms-tools/sounds/sines-440-602-hRange.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           window (string): The window type used for analysis
           t (float) = peak picking threshold (negative dB)
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
    """       
    # Analysis parameters: Modify values of the parameters marked XX
    window = 'blackman'                             # Window type
    t = -67                                               # threshold (negative dB)
    # window = blackman && t >= -67: Mean estimation error = [ 0.01060268  1.58192485] Hz
    # window = blackman harris && t >= -61: Mean estimation error = [ 0.01060268  1.58192485] Hz
    # ohers failed

    ### Go through the code below and understand it, do not modify anything ###   
    M = 2047                                             # Window size 
    N = 4096                                             # FFT Size
    H = 128                                              # Hop size in samples
    maxnSines = 2
    minSineDur = 0.02
    freqDevOffset = 10
    freqDevSlope = 0.001
    # read input sound
    fs, x = UF.wavread(inputFile)               
    w = get_window(window, M)                   # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)   # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
    fTrackTrue = genTrueFreqTracks(tStamps)     # Generate the true frequency tracks
    tailF = 20                                 
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF,:] - fTrackEst[tailF:-tailF,:]),axis=0)     
    print("Mean estimation error = " + str(meanErr) + ' Hz')      # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 900.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:np.int(N * maxplotfreq / fs + 1)]), cmap='hot_r')
    plt.plot(tStamps,fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps,fTrackEst, color = 'y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.title('frequency detection: Window = ' + window + '& t = ' + str(t))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return window, float(t), tStamps, fTrackEst, fTrackTrue  # Output returned 
Example #37
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    
    ### your code here


    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    (mX, pX) = stft.stftAnal(x, fs, w, N, H)

    numFrames = int(mX[:,0].size)
    frmTime = H*np.arange(numFrames)/float(fs)
    binFreq = np.arange(N/2+1)*float(fs)/N

    cutoff1 = 3000
    cutoff2 = 10000

    cutoff_bucket1 = np.ceil(float(cutoff1) * N / fs)
    cutoff_bucket2 = np.ceil(float(cutoff2) * N / fs)

    low_band = mX[:,1:cutoff_bucket1]
    high_band = mX[:,cutoff_bucket1:cutoff_bucket2]

    E = np.zeros((numFrames, 2))
    E[:,0] = by_frame_energy(low_band)
    E[:,1] = by_frame_energy(high_band)

    O = np.zeros((numFrames, 2))
    O[1:,:] = E[1:,:] - E[:-1,:]

    # half wave rectification
    O[O<=0] = 0

    # plot_odf(mX, fs, inputFile, M, N, H, O)

    return O
Example #38
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    fs,x = UF.wavread(inputFile)
    w = get_window(window, M)

    mX,pX = stft.stftAnal(x,w,N,H)
    r,c = np.shape(mX)
    mXLine = np.power(10,mX/20.0)  #mX is 2 dimision (x: N  y: Frequency)

    # get all the k frequencies
    bin_freqs = np.arange(N) * fs / float(N)

    # calculate the low-band frequency
    temp1 = np.where(bin_freqs > 0)[0]
    temp2 = np.where(bin_freqs < 3000)[0]
    band_low = np.intersect1d(temp1,temp2)

    # calculate the high-band frequency
    temp3 = np.where(bin_freqs > 3000)[0]
    temp4 = np.where(bin_freqs < 10000)[0]
    band_high = np.intersect1d(temp3,temp4)

    # initialize energy envelop
    engEnv = np.zeros((r,2))

    low_band_energy = np.sum(mXLine[:,band_low]**2, axis = 1)
    low_band_energy = 10 * np.log10(low_band_energy)
    engEnv[:,0] = low_band_energy

    # calculate the high-band frequency
    high_band_energy = np.sum(mXLine[:,band_high]**2, axis = 1)
    high_band_energy = 10 * np.log10(high_band_energy)
    engEnv[:,1] = high_band_energy

    return engEnv
Example #39
0
    def create_plot(self):
        p = np.arange(self.audio.size) / float(self.rates)
        duration = p[-1]
        plt_len = duration * 0.5
        plt.plot(np.arange(self.audio.size) / float(self.rates), self.audio)
        plt.axis([
            0, self.audio.size / float(self.rates),
            min(self.audio),
            max(self.audio)
        ])
        plt.savefig("plots/time-domain.png")
        plt.close()

        N = 8192  #FFT
        M = 8192  #Analysis window size
        H = int(0.75 * M)  #Overlap between window
        w = get_window("hamming", M)
        self.audio = np.float32(self.audio) / norm_fact[self.audio.dtype.name]
        maxplotfreq = self.rates / 8.82
        mX, pX = stft.stftAnal(self.audio, self.rates, w, N, H)
        numFrames = int(mX[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(self.rates)
        binFreq = self.rates * np.arange(N * maxplotfreq / self.rates) / N
        plt.figure(figsize=(plt_len, 1))
        plt.pcolormesh(
            frmTime, binFreq,
            np.transpose(mX[:, :int(N * maxplotfreq / self.rates + 1)]))
        plt.axis("off")
        plt.subplots_adjust(top=1,
                            bottom=0,
                            right=1,
                            left=0,
                            hspace=0,
                            wspace=0)
        plt.savefig("plots/magnitude spectogram.png", dpi=300)
        plt.close()

        plt.plot(mX)
        plt.axis("off")
        plt.subplots_adjust(top=1,
                            bottom=0,
                            right=1,
                            left=0,
                            hspace=0,
                            wspace=0)
        plt.savefig("plots/magnitude plot.png")
        plt.close()
        global mx
        mx = mX
Example #40
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    lowf_init = 0
    lowf_end = 3000
    highf_init = 3000
    highf_end = 10000

    w = get_window(window, M)
    fs, x = UF.wavread(inputFile)

    lowb_init = lowf_init * N / fs
    lowb_end = lowf_end * N / fs
    highb_init = highf_init * N / fs
    highb_end = highf_end * N / fs

    xmX, pmX = stft.stftAnal(x, fs, w, N, H)

    xmX_linear = 10**(xmX / 20)

    result_low  = 10 * np.log10 ( np.sum( abs( xmX_linear[:, 1 : lowb_end] )**2, 1 ) )
    result_high = 10 * np.log10 ( np.sum( abs( xmX_linear[:, highb_init + 1 : highb_end] )**2, 1 ) )


    frames = result_low.shape[0]
    result = np.array([result_low[0], result_high[0]])


    for i in range(1, frames):
        
        temp = np.array([result_low[i], result_high[i]])
        result = np.vstack( (result, temp) )

    return result
    
Example #41
0
def chirpTracker(inputFile='../sms-tools/sounds/chirp-150-190-linear.wav'):
    """
    Input:
           inputFile (string) = wav file including the path
    Output:
           M (int) = Window length
           H (int) = hop size in samples
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst (numpy array) = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue (numpy array) = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
           K is the number of frames
    """
    # Analysis parameters: Modify values of the parameters marked XX
    M = 3298                                    # Window size in samples
    
    ### Go through the code below and understand it, do not modify anything ###    
    H = 128                                     # Hop size in samples
    N = int(pow(2, np.ceil(np.log2(M))))        # FFT Size, power of 2 larger than M
    t = -80.0                                   # threshold
    window = 'blackman'                         # Window type
    maxnSines = 2                               # Maximum number of sinusoids at any time frame
    minSineDur = 0.0                            # minimum duration set to zero to not do tracking
    freqDevOffset = 30                          # minimum frequency deviation at 0Hz
    freqDevSlope = 0.001                        # slope increase of minimum frequency deviation
    
    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)   # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
    fTrackTrue = genTrueFreqTracks(tStamps)     # Generate the true frequency tracks
    tailF = 20                                 
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF,:] - fTrackEst[tailF:-tailF,:]),axis=0)     
    print("Mean estimation error = " + str(meanErr) + ' Hz')      # Print the error to terminal    
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)  # stft from anal
    maxplotfreq = 1500.0
    binFreq = fs*np.arange(N*maxplotfreq/fs)/N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:int(N * maxplotfreq / fs + 1)]),cmap = 'hot_r')
    plt.plot(tStamps,fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps,fTrackEst, color = 'y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.title('True and estimated frequency, windowsize = ' + str(M))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return M, H, tStamps, fTrackEst, fTrackTrue  # Output returned 
Example #42
0
def spec_calc(audio_inp, params):
    """
	Calculates the framewise cepstral coefficients for the true envelope of the audio file.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : Sampling rate of the audio
			- W : Window size(number of frames)
			- N : FFT size(multiple of 2)
			- H : Hop size
			- t : Threshold for sinusoidal detection in dB
			- maxnSines : Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice : 0,1,2
		If 0, simply shifts the pitch without amplitude interpolation
		If 1, performs amplitude interpolation framewise to preserve timbre
		If 2, uses the True envelope of the amplitude spectrum to sample the points from
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
	f0 : Hz
		The fundamental frequency of the note
		
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
	"""

    fs = params['fs']
    W = params['W']
    N = params['N']
    H = params['H']
    t = params['t']

    w = windows.hann(W)

    # Compute the STFT
    xmX, xpX = stftAnal(x=audio_inp, w=w, N=N, H=H)
    # xmX = stft_for_reconstruction(x = audio_inp, fft_size = N, hopsamp = H)
    # Remove the dB normalization done in the above function
    xmX = xmX / 20

    return xmX, xpX
Example #43
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """

    ### your code here
    fs, s = wavread(FileName)
    w = get_window(window, M)
    x, xp = stft.stftAnal(s, w, N, H)
    x = np.asarray(x)
    len3000 = int(3000 * N / fs)
    len10000 = int(10000 * N / fs)
    Eb3 = np.zeros(len(x[0]))
    Eb10 = np.zeros(len(x[0]))
    j = 0
    #energi band 3000
    for i in x:
        if j < len(x[0]):
            p = i[:len3000]
            Eb3[j] = sum(abs(p)**2)
            j = j + 1
    #energy band 10000
    j = 0
    for i in x:
        if j < len(x[0]):
            p = i[len3000:len10000]
            Eb10[j] = sum(abs(p)**2)
            j = j + 1
    odf3 = np.diff(10 * np.log10(Eb3))
    odf10 = np.diff(10 * np.log10(Eb10))
    k = 0
    if odf3[k] < 0:
        odf3[k] = 0
    if odf10[k] < 0:
        odf10[k] = 0
    return np.column_stack(odf3, odf10)
Example #44
0
def mainlobeTracker(inputFile="../../sounds/sines-440-602-hRange.wav"):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           window (string): The window type used for analysis
           t (float) = peak picking threshold (negative dB)
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
    """
    # Analysis parameters: Modify values of the parameters marked XX
    window = "blackmanharris"  # Window type
    t = -93.0  # threshold (negative dB)

    ### Go through the code below and understand it, do not modify anything ###
    M = 2047  # Window size
    N = 4096  # FFT Size
    H = 128  # Hop size in samples
    maxnSines = 2
    minSineDur = 0.02
    freqDevOffset = 10
    freqDevSlope = 0.001
    # read input sound
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)  # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)  # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(
        x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope
    )
    fTrackTrue = genTrueFreqTracks(tStamps)  # Generate the true frequency tracks
    tailF = 10
    # Compute mean estimation error. 50 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] - fTrackEst[tailF:-tailF, :]), axis=0)
    print "Mean estimation error = " + str(meanErr) + " Hz"  # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, fs, w, N, H)
    maxplotfreq = 900.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1]), cmap="hot_r")
    plt.plot(tStamps, fTrackTrue, "o-", color="c", linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color="y", linewidth=2.0)
    plt.legend(("True f1", "True f2", "Estimated f1", "Estimated f2"))
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.autoscale(tight=True)
    return window, t, tStamps, fTrackEst, fTrackTrue  # Output returned
Example #45
0
def main():
    inputFile = "../../sounds/flute-A4.wav"
    window = "hamming"
    M = 801
    N = 1024
    H = 400

    fs, x = UF.wavread(inputFile)

    w = get_window(window, M)

    mX, pX = STFT.stftAnal(x, w, N, H)

    plt.pcolormesh(np.transpose(mX))

    return locals()
Example #46
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    
    ### your code here

    fs,x = UF.wavread(inputFile)
    w = get_window(window,M)

    mX,pX = stft.stftAnal(x,w,N,H)
    mX = pow(10,mX/20.)
    
    num_frames = len(mX)
    band_energy = np.zeros((len(mX),2))
    for frm_idx in range(num_frames):
        frm = mX[frm_idx]
        for k in range(len(frm)):
            cur_f = k*44100/N
            if cur_f > 0 and cur_f < 3000:
                band_energy[frm_idx,0] += (frm[k]*frm[k])
            elif cur_f > 3000 and cur_f < 10000:
                band_energy[frm_idx,1] += (frm[k]*frm[k])

    band_energy = 10.0*np.log10(band_energy)

    odf = np.zeros((num_frames,2))
    for frm_idx in range(1,num_frames):
        odf[frm_idx,0] = band_energy[frm_idx,0]-band_energy[frm_idx-1,0]
        odf[frm_idx,0] = 0 if  odf[frm_idx,0] < 0 else odf[frm_idx,0]
        odf[frm_idx,1] = band_energy[frm_idx,1]-band_energy[frm_idx-1,1]
        odf[frm_idx,1] = 0 if  odf[frm_idx,1] < 0 else odf[frm_idx,1]

    return odf
Example #47
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, 
	minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal plus residual analysis
	tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope)
		
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

	# sum sinusoids and residual
	y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mXr, tfreq, y
Example #48
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	"""
	Perform analysis/synthesis using the harmonic plus residual model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)
	  
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mXr,hfreq, y
Example #49
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming,
                blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)
    xmX, xpX = stft.stftAnal(x, fs, w, N, H)
    return np.array(map((lambda mX: frameEnergies(mX, fs, N)), xmX))
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    w = get_window(window, M)         # get the window
    (fs,x) = UF.wavread(inputFile) 
    lowfreq = 3000.0 
    highfreq = 10000.0
    array_size = int( math.ceil (float(x.size) / H ) )#H = 128
    k1 =  math.ceil ( lowfreq / ( float(fs) / N ) ) 
    k2 =  math.ceil ( highfreq / ( float(fs) / N ) )
    energysignal = 0 
    energysignal2 = 0
    engEnv = np.zeros( ( array_size, 2 ) )
    xmX, xpX = stft.stftAnal( x, fs, w, N, H )
    for j in range ( 0, array_size ) :
        xmXTemp = xmX[j] 
        xmXTemp = np.power( 10, ( xmXTemp   / 20.0 ) )
        energysignal = 0.0 
        energysignal2 = 0.0
        for i in range( 1, x.size ) :
            if ( i < k1 ) :
                energysignal += np.square( xmXTemp[i] )
            if ( i >= k1 and i < k2 ) :
                energysignal2 += np.square( xmXTemp[i] )
        energysignal = 10 * np.log10( energysignal ) 
        energysignal2 = 10 * np.log10( energysignal2 ) 
        engEnv[j][0] = energysignal
        engEnv[j][1] = energysignal2
    return engEnv
Example #51
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    (mX, pX) = stft.stftAnal(x, fs, w, N, H)

    numFrames = int(mX[:,0].size)
    frmTime = H*np.arange(numFrames)/float(fs)
    binFreq = np.arange(N/2+1)*float(fs)/N

    cutoff1 = 3000
    cutoff2 = 10000

    cutoff_bucket1 = np.ceil(float(cutoff1) * N / fs)
    cutoff_bucket2 = np.ceil(float(cutoff2) * N / fs)

    low_band = mX[:,1:cutoff_bucket1]
    high_band = mX[:,cutoff_bucket1:cutoff_bucket2]

    E = np.zeros((numFrames, 2))
    E[:,0] = by_frame_energy(low_band)
    E[:,1] = by_frame_energy(high_band)


    #plot_energies(mX, fs, inputFile, M, N, H, E)

    return E
Example #52
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    xmX,xpX = stft.stftAnal(x,fs,w,N,H)
    
    ldx0 = 1
    ldx1 = ((3000*N)/fs) + 1
    hdx0 = ldx1
    hdx1 = ((10000*N)/fs) + 1

    sz = np.size(xmX[:,0])
    low_band = np.zeros(sz)
    high_band = np.zeros(sz)
    for i in np.arange(sz):
        #tmp = np.power(10,xmX[i,1:278]/20.0)
        tmp = np.power(10,xmX[i,ldx0:ldx1]/20.0)
        tmp[tmp < eps] = eps
        low_band[i] = 10 * np.log10(np.dot(tmp,tmp))
        #low_band[i] = 10.0 * np.log10(np.sum(np.square(np.power(10, (xmX[i, 1: 140] / 20.0)))))

        #tmp1 = np.power(10,xmX[i,279:928]/20.0)
	tmp1 = np.power(10,xmX[i,hdx0:hdx1]/20.0)
        tmp1[tmp1 < eps] = eps
        high_band[i] = 10 * np.log10(np.dot(tmp1,tmp1))

    return np.transpose(np.array([low_band,high_band]))
Example #53
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
        inputFile (string): input sound file (monophonic with sampling rate of 44100)
        window (string): analysis window type (choice of rectangular, triangular,
            hanning, hamming, blackman, blackmanharris)
        M (integer): analysis window size (odd integer value)
        N (integer): fft size (power of two, such that N > M)
        H (integer): hop size for the STFT computation
    Output:
        The function should return a numpy array with two columns, where the first
        column is the ODF computed on the low frequency band and the second column
        is the ODF computed on the high frequency band.
        ODF[:,0]: ODF computed in band 0 < f < 3000 Hz
        ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz"""
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    if N < M is True:
        raise ValueError("'N' should be greather than 'M'")
    if np.log2(N) % 1 != 0:
        raise ValueError("Input not power of 2")
    Xm, Xp = stft.stftAnal(x, w, N, H)
    Xm = 10 ** (Xm / 20)
    k = Xm.shape[0]
    #f = k × fs / N
    k_1 = np.array([3000, 10000]) * N / fs
    f = fs / 2.0 * np.arange(M) / float(M)
    print(f.shape)
    f_low = np.where(f>3000)[0][0]
    f_high = np.where(f>10000)[0][0]
    print(f_low, f_high, f_high - f_low, k_1)
    engEnv = np.zeros((k, 2))
    engEnv[:,0] = 10 * np.log10(np.sum(np.abs(Xm[:,:f_low]) ** 2, axis=1))
    engEnv[:,1] = 10 * np.log10(np.sum(np.abs(Xm[:,f_low+1:f_high]) ** 2, axis=1))
    engEnv = np.vstack((np.zeros((1,2)), engEnv))
    # ODF = np.zeros((k,2))
    ODF = engEnv[1:-1,:] - engEnv[:-2,:]
    ODF0 = np.where(ODF<0)
    ODF[ODF0[0],ODF0[1]] = 0
    return(ODF)
Example #54
0
def computeODF(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd integer value)
            N (integer): fft size (power of two, bigger or equal than than M)
            H (integer): hop size for the STFT computation
    Output:
            The function should return a numpy array with two columns, where the first column is the ODF 
            computed on the low frequency band and the second column is the ODF computed on the high 
            frequency band.
            ODF[:,0]: ODF computed in band 0 < f < 3000 Hz 
            ODF[:,1]: ODF computed in band 3000 < f < 10000 Hz
    """
    
    ### your code here
    def undoDB(x):
        return np.power(10, np.divide(x,20))
    
    def energy(x, k1, k2):
        x2 = np.power(x[:,k1:k2], 2)
        return np.sum(x2, axis=1)
    
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    bin1 = int(np.ceil(3000*N/fs))
    bin2 = int(np.ceil(10000*N/fs))
    mX, pX = stft.stftAnal(x, fs, w, N, H)
    nrgEnv1 = 10*np.log10( energy(undoDB(mX), 0, bin1))
    nrgEnv2 = 10*np.log10(energy(undoDB(mX), bin1, bin2))    
    engEnv = np.transpose(np.array([nrgEnv1, nrgEnv2]))

    O = engEnv[1:,:]-engEnv[0:-1]
    O[O<0]=0

    return O
    
    """
Example #55
0
def display(engEnv, inputFile, window, M, N, H):
    (fs, x) = UF.wavread(inputFile)
    w = get_window(window, M)

    xmX, _ = stft.stftAnal(x, fs, w, N, H)

    plt.figure(1, figsize=(9.5, 6))

    plt.subplot(211)
    numFrames = int(xmX[:,0].size)
    frmTime = H*np.arange(numFrames)/float(fs)
    binFreq = np.arange(N/2 + 1)*float(fs)/N
    plt.pcolormesh(frmTime, binFreq, np.transpose(xmX))
    plt.autoscale(tight=True)

    plt.subplot(212)
    plt.plot(frmTime, engEnv[:,0], 'b', label='Low')
    plt.plot(frmTime, engEnv[:,1], 'g', label='High')
    plt.legend()

    plt.tight_layout()
    plt.show()
Example #56
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    ### your code here
    def undoDB(x):
        return np.power(10, np.divide(x,20))
    
    def energy(x, k1, k2):
        x2 = np.power(x[:,k1:k2], 2)
        return np.sum(x2, axis=1)
    
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    bin1 = int(np.ceil(3000*N/fs))
    bin2 = int(np.ceil(10000*N/fs))
    mX, pX = stft.stftAnal(x, fs, w, N, H)
    nrgEnv1 = 10*np.log10( energy(undoDB(mX), 0, bin1))
    nrgEnv2 = 10*np.log10(energy(undoDB(mX), bin1, bin2))
    
    result = np.transpose(np.array([nrgEnv1, nrgEnv2]))
    return result


    """
Example #57
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
      inputFile (string): input sound file (monophonic with sampling rate of 44100)
      window (string): analysis window type (choice of rectangular, triangular,
          hanning, hamming, blackman, blackmanharris)
      M (integer): analysis window size (odd positive integer)
      N (integer): FFT size (power of 2, such that N > M)
      H (integer): hop size for the stft computation
    Output:
      The function should return a numpy array engEnv with shape Kx2,
      K = Number of frames
      containing energy envelop of the signal in decibles (dB) scale
      engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
      engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    if N < M is True:
        raise ValueError("'N' should be greather than 'M'")
    if np.log2(N) % 1 != 0:
        raise ValueError("Input not power of 2")
    Xm, Xp = stft.stftAnal(x, w, N, H)
    Xm = 10 ** (Xm / 20)
    k = Xm.shape[0]
    #f = k × fs / N
    k_1 = np.array([3000, 10000]) * N / fs
    f = fs / 2.0 * np.arange(M) / float(M)
    print(f.shape)
    f_low = np.where(f>3000)[0][0]
    f_high = np.where(f>10000)[0][0]
    print(f_low, f_high, f_high - f_low, k_1)
    engEnv = np.zeros((k, 2))
    engEnv[:,0] = 10 * np.log10(np.sum(np.abs(Xm[:,:f_low]) ** 2, axis=1))
    engEnv[:,1] = 10 * np.log10(np.sum(np.abs(Xm[:,f_low+1:f_high]) ** 2, axis=1))
    return(engEnv)
Example #58
0
def computeSNR(inputFile, window, M, N, H):
    """
    Input:
            inputFile (string): wav file name including the path 
            window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, 
                    blackman, blackmanharris)
            M (integer): analysis window length (odd positive integer)
            N (integer): fft size (power of two, > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a python tuple of both the SNR values (SNR1, SNR2)
            SNR1 and SNR2 are floats.
    """
    ## your code here

    fs, x = UF.wavread(inputFile)
    #x1 = x
    #x2 = x[M:-M]
    w = get_window(window, M)
    (mX, pX) = stft.stftAnal(x, fs, w, N, H)
    y = stft.stftSynth(mX, pX, M, H)
    noise = x - y[:x.size]

    return (snr(x, noise), snr(x[M:-M], noise[M:-M]))
Example #59
0
def computeEngEnv(inputFile, window, M, N, H):
    """
    Inputs:
            inputFile (string): input sound file (monophonic with sampling rate of 44100)
            window (string): analysis window type (choice of rectangular, triangular, hanning, 
                hamming, blackman, blackmanharris)
            M (integer): analysis window size (odd positive integer)
            N (integer): FFT size (power of 2, such that N > M)
            H (integer): hop size for the stft computation
    Output:
            The function should return a numpy array engEnv with shape Kx2, K = Number of frames
            containing energy envelop of the signal in decibles (dB) scale
            engEnv[:,0]: Energy envelope in band 0 < f < 3000 Hz (in dB)
            engEnv[:,1]: Energy envelope in band 3000 < f < 10000 Hz (in dB)
    """
    
    w = get_window(window, M)
    fs, x = UF.wavread(inputFile)
    mX, pX = stft.stftAnal(x, w, N, H)

    kmin, kmax = 1, int(np.ceil(3000.*N/fs)) # 0-3000Hz excluding 0 and 3000Hz
    l = mX.shape[0]
    e1 = np.zeros((1, l))
    for i in range(l):
        e1[0,i] = 10. * np.log10( np.sum((10.**(mX[i,kmin:kmax]/20.))**2) )

    kmin = kmax
    kmax = int(np.ceil(10000.*N/fs))
    e2 = np.zeros((1, l))
    for i in range(l):
        e2[0,i] = 10. * np.log10( np.sum((10.**(mX[i,kmin:kmax]/20.))**2) )

    e = np.zeros((e1.shape[1],2))
    e[:,0] = e1
    e[:,1] = e2
    return e