Exemple #1
0
def sineODF(file='../../../../../audioDSP_course/assignments/sms-tools/sounds/piano.wav'):
    fs, x = UF.wavread(file)

    # set params:
    M = 1024    # window size
    H = int(M/3)     # hop size
    t = -80.0   #treshold (dB??)
    window = 'blackman' # window type
    fftSize = int(pow(2, np.ceil(np.log2(M))))  # size of FFT
    N = fftSize
    maxnSines = 10      # maximum simultaneous sines
    minSineDur = 0.1    # minimal duration of sines
    freqDevOffset = 30  # min(??) frequency deviation at 0Hz
    freqDevSlope = 0.001    # slope increase of min freq dev.


    w = get_window(window, M)    # get analysis window
    tStamps = genTimeStamps(len(x), M, fs, H)    # generate timestamp return?
    fTrackEst, mTrackEst, pTreckEst = SM.sineModelAnal(x, fs, w, fftSize, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    fTrackTrue = genTrueFreqTracks(tStamps) # get true freq. tracks

    # plotting:
    mX, pX = stft.stftAnal(x, fs, w, fftSize, H)
    maxplotfreq = 1500.0
    binFreq = fs*np.arange(N*maxplotfreq/fs)/N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]),cmap = 'hot_r')
    # plt.plot(fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color = 'y', linewidth=2.0)
    # plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return fTrackEst
Exemple #2
0
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1):

	originalWav = Sndfile(inputAudio, 'r')
	x = originalWav.read_frames(originalWav.nframes)
	fs = originalWav.samplerate
	nChannel = originalWav.channels
	print fs
	if nChannel >1:
		x = x[0]


	w = np.hamming(801)
	N = 2048
	t = -90
	minSineDur = .005
	maxnSines = 150
	freqDevOffset = 20
	freqDevSlope = 0.02
	Ns = 512
	H = Ns/4
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
	inputDur = float(len(tfreq)*H/fs)
	#timeScale = np.array([0.1,0.1, inputDur, inputDur*2])
	timeScale = np.array([0,0, .4,outputDuration])

	ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale)
	y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
	
	if writeOutput ==1:
		outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate)
		outputWav.write_frames(y)
		outputWav.close()
	else:
		return y, fs, nChannel
Exemple #3
0
def chirpTracker(inputFile='../../sounds/chirp-150-190-linear.wav'):
    """
    Input:
           inputFile (string) = wav file including the path
    Output:
           M (int) = Window length
           H (int) = hop size in samples
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst (numpy array) = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue (numpy array) = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
           K is the number of frames
    """
    # Analysis parameters: Modify values of the parameters marked XX
    M = 3300  # Window size in samples

    ### Go through the code below and understand it, do not modify anything ###
    H = 128  # Hop size in samples
    N = int(pow(2, np.ceil(np.log2(M))))  # FFT Size, power of 2 larger than M
    t = -80.0  # threshold
    window = 'blackman'  # Window type
    maxnSines = 2  # Maximum number of sinusoids at any time frame
    minSineDur = 0.0  # minimum duration set to zero to not do tracking
    freqDevOffset = 30  # minimum frequency deviation at 0Hz
    freqDevSlope = 0.001  # slope increase of minimum frequency deviation

    fs, x = UF.wavread(inputFile)  # read input sound
    w = get_window(window, M)  # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)  # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t,
                                                       maxnSines, minSineDur,
                                                       freqDevOffset,
                                                       freqDevSlope)
    fTrackTrue = genTrueFreqTracks(
        tStamps)  # Generate the true frequency tracks
    tailF = 20
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] -
                             fTrackEst[tailF:-tailF, :]),
                      axis=0)
    print "Mean estimation error = " + str(
        meanErr) + ' Hz'  # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 1500.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps,
                   binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]),
                   cmap='hot_r')
    plt.plot(tStamps, fTrackTrue, 'o-', color='c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color='y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return M, H, tStamps, fTrackEst, fTrackTrue  # Output returned
Exemple #4
0
def mainlobeTracker(inputFile='../../sounds/sines-440-602-hRange.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           window (string): The window type used for analysis
           t (float) = peak picking threshold (negative dB)
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
    """
    # Analysis parameters: Modify values of the parameters marked XX
    window = 'blackman'  # Window type
    t = -80  # threshold (negative dB)

    ### Go through the code below and understand it, do not modify anything ###
    M = 2047  # Window size
    N = 4096  # FFT Size
    H = 128  # Hop size in samples
    maxnSines = 2
    minSineDur = 0.02
    freqDevOffset = 10
    freqDevSlope = 0.001
    # read input sound
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)  # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)  # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t,
                                                       maxnSines, minSineDur,
                                                       freqDevOffset,
                                                       freqDevSlope)
    fTrackTrue = genTrueFreqTracks(
        tStamps)  # Generate the true frequency tracks
    tailF = 20
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] -
                             fTrackEst[tailF:-tailF, :]),
                      axis=0)
    print "Mean estimation error = " + str(
        meanErr) + ' Hz'  # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 900.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps,
                   binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]),
                   cmap='hot_r')
    plt.plot(tStamps, fTrackTrue, 'o-', color='c', linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color='y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return window, float(t), tStamps, fTrackEst, fTrackTrue  # Output returned
Exemple #5
0
def mainlobeTracker(inputFile = '../sms-tools/sounds/sines-440-602-hRange.wav'):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           window (string): The window type used for analysis
           t (float) = peak picking threshold (negative dB)
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
    """       
    # Analysis parameters: Modify values of the parameters marked XX
    window = 'blackman'                             # Window type
    t = -67                                               # threshold (negative dB)
    # window = blackman && t >= -67: Mean estimation error = [ 0.01060268  1.58192485] Hz
    # window = blackman harris && t >= -61: Mean estimation error = [ 0.01060268  1.58192485] Hz
    # ohers failed

    ### Go through the code below and understand it, do not modify anything ###   
    M = 2047                                             # Window size 
    N = 4096                                             # FFT Size
    H = 128                                              # Hop size in samples
    maxnSines = 2
    minSineDur = 0.02
    freqDevOffset = 10
    freqDevSlope = 0.001
    # read input sound
    fs, x = UF.wavread(inputFile)               
    w = get_window(window, M)                   # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)   # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
    fTrackTrue = genTrueFreqTracks(tStamps)     # Generate the true frequency tracks
    tailF = 20                                 
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF,:] - fTrackEst[tailF:-tailF,:]),axis=0)     
    print("Mean estimation error = " + str(meanErr) + ' Hz')      # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)
    maxplotfreq = 900.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:np.int(N * maxplotfreq / fs + 1)]), cmap='hot_r')
    plt.plot(tStamps,fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps,fTrackEst, color = 'y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.title('frequency detection: Window = ' + window + '& t = ' + str(t))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    return window, float(t), tStamps, fTrackEst, fTrackTrue  # Output returned 
Exemple #6
0
def chirpTracker(inputFile='../sms-tools/sounds/chirp-150-190-linear.wav'):
    """
    Input:
           inputFile (string) = wav file including the path
    Output:
           M (int) = Window length
           H (int) = hop size in samples
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst (numpy array) = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue (numpy array) = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
           K is the number of frames
    """
    # Analysis parameters: Modify values of the parameters marked XX
    M = 3298                                    # Window size in samples
    
    ### Go through the code below and understand it, do not modify anything ###    
    H = 128                                     # Hop size in samples
    N = int(pow(2, np.ceil(np.log2(M))))        # FFT Size, power of 2 larger than M
    t = -80.0                                   # threshold
    window = 'blackman'                         # Window type
    maxnSines = 2                               # Maximum number of sinusoids at any time frame
    minSineDur = 0.0                            # minimum duration set to zero to not do tracking
    freqDevOffset = 30                          # minimum frequency deviation at 0Hz
    freqDevSlope = 0.001                        # slope increase of minimum frequency deviation
    
    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)   # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
    fTrackTrue = genTrueFreqTracks(tStamps)     # Generate the true frequency tracks
    tailF = 20                                 
    # Compute mean estimation error. 20 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF,:] - fTrackEst[tailF:-tailF,:]),axis=0)     
    print("Mean estimation error = " + str(meanErr) + ' Hz')      # Print the error to terminal    
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, w, N, H)  # stft from anal
    maxplotfreq = 1500.0
    binFreq = fs*np.arange(N*maxplotfreq/fs)/N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:,:int(N * maxplotfreq / fs + 1)]),cmap = 'hot_r')
    plt.plot(tStamps,fTrackTrue, 'o-', color = 'c', linewidth=3.0)
    plt.plot(tStamps,fTrackEst, color = 'y', linewidth=2.0)
    plt.legend(('True f1', 'True f2', 'Estimated f1', 'Estimated f2'))
    plt.title('True and estimated frequency, windowsize = ' + str(M))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return M, H, tStamps, fTrackEst, fTrackTrue  # Output returned 
Exemple #7
0
def find_chirp_end_ms_sinusoidal_model(input_path):
    window = 'blackmanharris'
    M = 401
    N = 2048
    t = -90
    minSineDur = 0.5
    maxnSines = 30
    freqDevOffset = 3
    freqDevSlope = 0.000000
    H = 50
    minFreq = 100
    maxFreq = 950

    (fs, x) = UF.wavread(input_path)
    w = get_window(window, M)
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope, minFreq, maxFreq)

    numFrames = int(tfreq[:,0].size)
    frmTime = H*np.arange(numFrames)/float(fs)

    tfreq[tfreq<=0] = 0

    # tfreq[tfreq<=0] = np.nan
    # xmX, xpX = stft.stftAnal(x, w, N, H)
    #
    # plt.figure(1, figsize=(9.5, 6))
    # plt.plot(frmTime, tfreq)
    # binFreq = np.arange(N/2+1)*float(fs)/N
    # plt.pcolormesh(frmTime, binFreq, np.transpose(xmX))
    # plt.title('mX (piano.wav), M=1001, N=1024, H=256')
    # plt.autoscale(tight=True)
    # plt.show()

    max = 0
    max_i = 0
    for i in range(0, tfreq.shape[0]):
        sine_max_i = np.argmax(tfreq[i])
        sine_max = tfreq[i][sine_max_i]
        if sine_max > max:
            max = sine_max
            max_i = i

    end_of_chirp_ms = frmTime[max_i] * 1000

    # calculate the amount of missing frequency at the
    # end of the chirp to add that time back in
    missingFreq = 1000 - maxFreq
    time_fix_ms = 14 / 1000 * missingFreq * 1000
    return end_of_chirp_ms + time_fix_ms
Exemple #8
0
def mainlobeTracker(inputFile="../../sounds/sines-440-602-hRange.wav"):
    """
    Input:
           inputFile (string): wav file including the path
    Output:
           window (string): The window type used for analysis
           t (float) = peak picking threshold (negative dB)
           tStamps (numpy array) = A Kx1 numpy array of time stamps at which the frequency components were estimated
           fTrackEst = A Kx2 numpy array of estimated frequency values, one row per time frame, one column per component
           fTrackTrue = A Kx2 numpy array of true frequency values, one row per time frame, one column per component
    """
    # Analysis parameters: Modify values of the parameters marked XX
    window = "blackmanharris"  # Window type
    t = -93.0  # threshold (negative dB)

    ### Go through the code below and understand it, do not modify anything ###
    M = 2047  # Window size
    N = 4096  # FFT Size
    H = 128  # Hop size in samples
    maxnSines = 2
    minSineDur = 0.02
    freqDevOffset = 10
    freqDevSlope = 0.001
    # read input sound
    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)  # Compute analysis window
    tStamps = genTimeStamps(x.size, M, fs, H)  # Generate the tStamps to return
    # analyze the sound with the sinusoidal model
    fTrackEst, mTrackEst, pTrackEst = SM.sineModelAnal(
        x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope
    )
    fTrackTrue = genTrueFreqTracks(tStamps)  # Generate the true frequency tracks
    tailF = 10
    # Compute mean estimation error. 50 frames at the beginning and end not used to compute error
    meanErr = np.mean(np.abs(fTrackTrue[tailF:-tailF, :] - fTrackEst[tailF:-tailF, :]), axis=0)
    print "Mean estimation error = " + str(meanErr) + " Hz"  # Print the error to terminal
    # Plot the estimated and true frequency tracks
    mX, pX = stft.stftAnal(x, fs, w, N, H)
    maxplotfreq = 900.0
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(tStamps, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1]), cmap="hot_r")
    plt.plot(tStamps, fTrackTrue, "o-", color="c", linewidth=3.0)
    plt.plot(tStamps, fTrackEst, color="y", linewidth=2.0)
    plt.legend(("True f1", "True f2", "Estimated f1", "Estimated f2"))
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.autoscale(tight=True)
    return window, t, tStamps, fTrackEst, fTrackTrue  # Output returned
Exemple #9
0
def sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope):
	"""
	Analysis of a sound using the sinusoidal plus residual model
	x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal
	"""

	# perform sinusoidal analysis
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
	Ns = 512
	xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs)    	# subtract sinusoids from original sound
	return tfreq, tmag, tphase, xr
Exemple #10
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)
    return x, fs, tfreq, y
Exemple #11
0
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf):
	"""
	Analysis of a sound using the sinusoidal plus stochastic model
	x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	stocf: decimation factor used for the stochastic approximation
	returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual
	"""

	# perform sinusoidal analysis
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
	Ns = 512
	xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs)    	# subtract sinusoids from original sound
	stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf)            # compute stochastic model of residual
	return tfreq, tmag, tphase, stocEnv
Exemple #12
0
 def sine_model_analysis(self,
                         window_size=2047,
                         fft_size=4096,
                         hop_size=150,
                         threshold_db=-80,
                         min_sine_dur=0.15,
                         max_sines=15):
     window = np.blackman(window_size)
     self.fft_size = fft_size
     self.window_size = window_size
     self.hop_size = hop_size
     self.threshold_db = threshold_db
     self.min_sine_dur = min_sine_dur
     self.max_sines = max_sines
     self.stft_magnitudes, self.stft_phases = STFT.stftAnal(
         self.signal, window, fft_size, hop_size)
     self.frequencies, self.magnitudes, self.phases = SM.sineModelAnal(
         self.signal, self.sample_rate, window, fft_size, hop_size,
         threshold_db, max_sines, min_sine_dur)
     self.compute_lines()
Exemple #13
0
def spsModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset,
                 freqDevSlope, stocf):
    """
	Analysis of a sound using the sinusoidal plus stochastic model
	x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	stocf: decimation factor used for the stochastic approximation
	returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual
	"""

    # perform sinusoidal analysis
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)
    Ns = N  #512
    xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase,
                            fs)  # subtract sinusoids from original sound
    #stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf)            # compute stochastic model of residual
    stocEnv = STM.stochasticModelAnal(xr, H, N, stocf)
    return tfreq, tmag, tphase, stocEnv
Exemple #14
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, 
					maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""
		
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# analyze the sound with the sinusoidal model
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the output sound from the sinusoidal representation
	y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# output sound file name
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the synthesized sound obtained from the sinusoidal synthesis
	UF.wavwrite(y, fs, outputFile)
	return x,fs,tfreq,y
Exemple #15
0
def exploreSineModel(inputFile='../sms-tools/sounds/multisines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
            Discuss on the forum!
    """
    window='hamming'                            # Window type
    M=3001                                      # Window size in sample
    N=4096                                      # FFT Size
    t=-80                                       # Threshold                
    minSineDur=0.02                             # minimum duration of a sinusoid
    maxnSines=15                                # Maximum number of sinusoids at any time frame
    freqDevOffset=10                            # minimum frequency deviation at 0Hz
    freqDevSlope=0.001                          # slope increase of minimum frequency deviation
    Ns = 512                                    # size of fft used in synthesis
    H = 128                                     # hop size (has to be 1/4 of Ns)
    
    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3,1,1)
    plt.plot(np.arange(x.size)/float(fs), x)
    plt.axis([0, x.size/float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')
                
    # plot the sinusoidal frequencies
    plt.subplot(3,1,2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H*np.arange(numFrames)/float(fs)
        tfreq[tfreq<=0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size/float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3,1,3)
    plt.plot(np.arange(y.size)/float(fs), y)
    plt.axis([0, y.size/float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True
import stft as STFT
import sineModel as SM
import utilFunctions as UF

(fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../sounds/bendir.wav"))
w = np.hamming(2001)
N = 2048
H = 200
t = -80
minSineDur = 0.02
maxnSines = 150
freqDevOffset = 10
freqDevSlope = 0.001
mX, pX = STFT.stftAnal(x, fs, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

plt.figure(1, figsize=(9.5, 7))
maxplotfreq = 800.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:, : maxplotbin + 1], axis=1)))
plt.autoscale(tight=True)

tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color="k", lw=1.5)
plt.autoscale(tight=True)
plt.title("pX + sinusoidal tracks (bendir.wav)")
def analysis(inputFile='../../sounds/mridangam.wav',
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02):
    """
	Analyze a sound with the sine model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	returns inputFile: input file name; fs: sampling rate of input file,
	        tfreq, tmag: sinusoidal frequencies and magnitudes
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the sines without original phases
    y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)

    return inputFile, fs, tfreq, tmag
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, 
					maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""
		
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# analyze the sound with the sinusoidal model
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the output sound from the sinusoidal representation
	y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# output sound file name
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the synthesized sound obtained from the sinusoidal synthesis
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
				
	# plot the sinusoidal frequencies
	plt.subplot(3,1,2)
	if (tfreq.shape[1] > 0):
		numFrames = tfreq.shape[0]
		frmTime = H*np.arange(numFrames)/float(fs)
		tfreq[tfreq<=0] = np.nan
		plt.plot(frmTime, tfreq)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of sinusoidal tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/'))

import stft as STFT
import sineModel as SM
import utilFunctions as UF

(fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../sounds/flute-A4.wav'))
w = np.blackman(601)
N = 1024
H = 150
t = -80
minSineDur = .1
maxnSines = 150
mX, pX = STFT.stftAnal(x, fs, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur)

plt.figure(1, figsize=(9.5, 5))
maxplotfreq = 5000.0
maxplotbin = int(N*maxplotfreq/fs)
numFrames = int(mX[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)                             
binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1]))
plt.autoscale(tight=True)
  
tracks = tfreq*np.less(tfreq, maxplotfreq)
tracks[tracks<=0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1.5)
plt.autoscale(tight=True)
plt.title('mX + sinusoidal tracks (flute-A4.wav)')
import utilFunctions as UF
import sineTransformations as SMT


(fs, x) = UF.wavread('../../../sounds/mridangam.wav')
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns/4
mX, pX = STFT.stftAnal(x, fs, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8])          
ytfreq, ytmag = SMT.sineTimeScaling(tfreq, tmag, timeScale)
y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
mY, pY = STFT.stftAnal(y, fs, w, N, H)

plt.figure(1, figsize=(12, 9))
maxplotfreq = 4000.0
plt.subplot(4,1,1)
plt.plot(np.arange(x.size)/float(fs), x, 'b')
plt.axis([0,x.size/float(fs),min(x),max(x)])
plt.title('x (mridangam.wav)')                        

plt.subplot(4,1,2)
numFrames = int(tfreq[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)
import stft as STFT
import sineModel as SM
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/mridangam.wav')
x1 = x[:int(1.49*fs)]
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns//4
sfreq, smag, sphase = SM.sineModelAnal(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([.01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1, .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0])          
L = sfreq[:,0].size                                    # number of input frames
maxInTime = max(timeScale[::2])                      # maximum value used as input times
maxOutTime = max(timeScale[1::2])                    # maximum value used in output times
outL = int(L*maxOutTime/maxInTime)                     # number of output frames
inFrames = L*timeScale[::2]/maxInTime                # input time values in frames
outFrames = outL*timeScale[1::2]/maxOutTime          # output time values in frames
timeScalingEnv = interp1d(outFrames, inFrames, fill_value=0)    # interpolation function
indexes = timeScalingEnv(np.arange(outL))              # generate frame indexes for the output
ysfreq = sfreq[int(round(indexes[0])),:]                    # first output frame
ysmag = smag[int(round(indexes[0])),:]                      # first output frame
for l in indexes[1:]:                                  # generate frames for output sine tracks
	ysfreq = np.vstack((ysfreq, sfreq[int(round(l)),:]))  
	ysmag = np.vstack((ysmag, smag[int(round(l)),:])) 
def sms_analysis_from_file(input_filename, maxNumSines=SMS.maxNumSines):
    x, Fs = librosa.load(input_filename)
    tfreq, tmag, tphase = SM.sineModelAnal(x, Fs, SMS.w, SMS.N_FFT, SMS.H,
                                           SMS.t, maxNumSines, SMS.minSineDur,
                                           SMS.freqDevOffset, SMS.freqDevSlope)
    return (tfreq, tmag, tphase, Fs)
def main(
    inputFile="../../sounds/bendir.wav",
    window="hamming",
    M=2001,
    N=2048,
    t=-80,
    minSineDur=0.02,
    maxnSines=150,
    freqDevOffset=10,
    freqDevSlope=0.001,
    stocf=0.2,
):

    # ------- analysis parameters -------------------

    # inputFile: input sound file (monophonic with sampling rate of 44100)
    # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    # M: analysis window size
    # N: fft size (power of two, bigger or equal than M)
    # t: magnitude threshold of spectral peaks
    # minSineDur: minimum duration of sinusoidal tracks
    # maxnSines: maximum number of parallel sinusoids
    # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    # stocf: decimation factor used for the stochastic approximation

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # --------- computation -----------------

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal analysis
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # subtract sinusoids from original sound
    Ns = 512
    xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs)

    # compute stochastic model of residual
    mYst = STM.stochasticModelAnal(xr, H, stocf)

    # synthesize sinusoids
    ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # synthesize stochastic component
    yst = STM.stochasticModelSynth(mYst, H)

    # sum sinusoids and stochastic
    y = yst[: min(yst.size, ys.size)] + ys[: min(yst.size, ys.size)]

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav"
    outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav"
    outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav"

    # write sounds files for sinusoidal, residual, and the sum
    UF.wavwrite(ys, fs, outputFileSines)
    UF.wavwrite(yst, fs, outputFileStochastic)
    UF.wavwrite(y, fs, outputFile)

    # --------- plotting --------------------

    # plot stochastic component
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("input sound: x")

    plt.subplot(3, 1, 2)
    numFrames = int(mYst[:, 0].size)
    sizeEnv = int(mYst[0, :].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot sinusoidal frequencies on top of stochastic component
    sines = tfreq * np.less(tfreq, maxplotfreq)
    sines[sines == 0] = np.nan
    numFrames = int(sines[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    plt.plot(frmTime, sines, color="k", ms=3, alpha=1)
    plt.xlabel("time(s)")
    plt.ylabel("Frequency(Hz)")
    plt.autoscale(tight=True)
    plt.title("sinusoidal + stochastic spectrogram")

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("output sound: y")

    plt.tight_layout()
    plt.show()
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, 
	minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02):
	"""
	Analyze a sound with the sine model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	returns inputFile: input file name; fs: sampling rate of input file,
	        tfreq, tmag: sinusoidal frequencies and magnitudes
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the sine model of the whole sound
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the sines without original phases
	y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
		
	# plot the sinusoidal frequencies
	if (tfreq.shape[1] > 0):
		plt.subplot(3,1,2)
		tracks = np.copy(tfreq)
		tracks = tracks*np.less(tracks, maxplotfreq)
		tracks[tracks<=0] = np.nan
		numFrames = int(tracks[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, tracks)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of sinusoidal tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, tfreq, tmag
Exemple #25
0
def exploreSineModel(inputFile='../../sounds/multiSines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
            Discuss on the forum!
    """
    # window='hamming'                            # Window type
    window='blackmanharris'                            # Window type
    # M=3001                                      # Window size in sample
    M=3529                                      # Window size in sample
    #M=4095                                      # Window size in sample
    N=4096                                      # FFT Size
    #N=8192                                      # FFT Size
    # N=8192                                      # FFT Size
    # t=-80                                       # Threshold
    t=-50                                       # Threshold
    #minSineDur=0.02                             # minimum duration of a sinusoid
    minSineDur=0.01                             # minimum duration of a sinusoid
    maxnSines=15                                # Maximum number of sinusoids at any time frame
    #maxnSines=9                                # Maximum number of sinusoids at any time frame
    freqDevOffset=10                            # minimum frequency deviation at 0Hz
    #freqDevOffset=20                            # minimum frequency deviation at 0Hz
    freqDevSlope=0.001                          # slope increase of minimum frequency deviation
    # Ns = 512                                    # size of fft used in synthesis
    # H = 128                                     # hop size (has to be 1/4 of Ns)
    Ns = 512                                    # size of fft used in synthesis
    H = Ns / 4                                  # hop size (has to be 1/4 of Ns)

    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # SNR calculation
    x1 = x[:len(y)]
    e_signal = calculate_energy(x1)
    e_error = calculate_energy(x1 - y)
    snr = calculate_snr(e_signal, e_error)
    print("SNR {}".format(snr))
    errorFile = os.path.basename(inputFile)[:-4] + '_sineModel_error.wav'
    UF.wavwrite(x1 - y, fs, errorFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3,1,1)
    plt.plot(np.arange(x.size)/float(fs), x)
    plt.axis([0, x.size/float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')
                
    # plot the sinusoidal frequencies
    plt.subplot(3,1,2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H*np.arange(numFrames)/float(fs)
        tfreq[tfreq<=0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size/float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3,1,3)
    plt.plot(np.arange(y.size)/float(fs), y)
    plt.plot(np.arange(y.size)/float(fs), abs(x1 - y))  # error
    plt.axis([0, y.size/float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, 
	minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):

	# ------- analysis parameters -------------------

	# inputFile: input sound file (monophonic with sampling rate of 44100)
	# window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	# M: analysis window size 
	# N: fft size (power of two, bigger or equal than M)
	# t: magnitude threshold of spectral peaks 
	# minSineDur: minimum duration of sinusoidal tracks
	# maxnSines: maximum number of parallel sinusoids
	# freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	# freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# --------- computation -----------------

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal analysis
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
		
	# subtract sinusoids from original 
	xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs)
		
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
		
	# synthesize sinusoids
	ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# sum sinusoids and residual
	y = xr[:min(xr.size, ys.size)]+ys[:min(xr.size, ys.size)]

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)

	# --------- plotting --------------------

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
		
	# plot the magnitude spectrogram of residual
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mXr[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                       
	binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
	plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1]))
	plt.autoscale(tight=True)
		
	# plot the sinusoidal frequencies on top of the residual spectrogram
	tracks = tfreq*np.less(tfreq, maxplotfreq)
	tracks[tracks<=0] = np.nan
	plt.plot(frmTime, tracks, color='k')
	plt.title('sinusoidal tracks + residual spectrogram')
	plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')


	plt.tight_layout()
	plt.show()
Exemple #27
0
import stft as STFT
import sineModel as SM
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/mridangam.wav')
x1 = x[:int(1.49 * fs)]
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns / 4
sfreq, smag, sphase = SM.sineModelAnal(x1, fs, w, N, H, t, maxnSines,
                                       minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([
    .01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1,
    .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0
])
L = sfreq[:, 0].size  # number of input frames
maxInTime = max(timeScale[::2])  # maximum value used as input times
maxOutTime = max(timeScale[1::2])  # maximum value used in output times
outL = int(L * maxOutTime / maxInTime)  # number of output frames
inFrames = L * timeScale[::2] / maxInTime  # input time values in frames
outFrames = outL * timeScale[1::2] / maxOutTime  # output time values in frames
timeScalingEnv = interp1d(outFrames, inFrames,
                          fill_value=0)  # interpolation function
indexes = timeScalingEnv(
    np.arange(outL))  # generate frame indexes for the output
ysfreq = sfreq[round(indexes[0]), :]  # first output frame
def exploreSineModel(inputFile='multiSines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
    """
    window = 'hamming'  # Window type
    M = 2001  # Window size in sample
    N = 2048  # FFT Size
    t = -80  # Threshold
    minSineDur = 0.02  # minimum duration of a sinusoid
    maxnSines = 150  # Maximum number of sinusoids at any time frame
    freqDevOffset = 10  # minimum frequency deviation at 0Hz
    freqDevSlope = 0.001  # slope increase of minimum frequency deviation
    Ns = 512  # size of fft used in synthesis
    H = 128  # hop size (has to be 1/4 of Ns)

    fs, x = UF.wavread(inputFile)  # read input sound
    w = get_window(window, M)  # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True
Exemple #29
0
import stft as STFT
import sineModel as SM
import utilFunctions as UF

(fs, x) = UF.wavread(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../sounds/flute-A4.wav'))
w = np.blackman(601)
N = 1024
H = 150
t = -80
minSineDur = .1
maxnSines = 150
mX, pX = STFT.stftAnal(x, fs, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                       minSineDur)

plt.figure(1, figsize=(9.5, 5))
maxplotfreq = 5000.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1]))
plt.autoscale(tight=True)

tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1.5)
plt.autoscale(tight=True)
plt.title('mX + sinusoidal tracks (flute-A4.wav)')
Exemple #30
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)