Ejemplo n.º 1
0
def onset_test(noise_ffts, noise_phs, ind, mY, pY, M, H, recType, outDir, name,
               fs):
    """
    Synthesis of a sound using the short-time Fourier transform
    mY: magnitude spectra, pY: phase spectra, M: window size, H: hop-size
    returns y: output sound
    """
    for i in ind:
        mY[i - 3:i + 3, :] = noise_ffts[97:103, :]
        pY[i - 3:i + 3, :] = noise_phs[97:103, :]

    hM1 = (M + 1) // 2  # half analysis window size by rounding
    hM2 = M // 2  # half analysis window size by floor
    nFrames = mY[:, 0].size  # number of frames
    y = np.zeros(nFrames * H + hM1 + hM2)  # initialize output array
    pin = hM1
    for i in range(nFrames):  # iterate over all frames
        y1 = DFT.dftSynth(mY[i, :], pY[i, :], M)  # compute idft
        y[pin - hM1:pin +
          hM2] += H * y1  # overlap-add to generate output sound
        pin += H  # advance sound pointer
    y = np.delete(
        y,
        range(hM2))  # delete half of first window which was added in stftAnal
    y = np.delete(y, range(
        y.size - hM1,
        y.size))  # delete the end of the sound that was added in stftAnal
    os.chdir('/home/tgoodall/sms-tools/software/models/Overtone_Arrays/' +
             recType + '/' + outDir)
    outputFile = name + '.wav'
    UF.wavwrite(y, fs, outputFile)
    return y
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    print("Reading WAV...")
    (sampleRate, samples) = wavread(inputFile)
    step = M

    print("Sample Rate:",sampleRate,"kHz/16")
    print("Sample Size:",len(samples))

    base = os.path.basename(inputFile)
    outputFile = "%s_downsampled.wav" % base.replace(".wav", "")

    print("Downsampling...")
    downsampled = hopSamples(samples, step)

    print("Writing downsampled WAV...")

    flen=len(samples)
    nlen=len(downsampled)
    newRate = sampleRate/(flen/nlen)
    wavwrite(downsampled, sampleRate, outputFile)

    print("Done:", outputFile)
Ejemplo n.º 3
0
def computeModel(inputFile, B, M, window = 'hanning', t = -90):

    bands = range(len(B))

    fs, x = UF.wavread(inputFile)
    w = [get_window(window, M[i]) for i in bands]
    N = (2**np.ceil(np.log2(B))).astype(int)

    y_combined = SMMR.sineModelMultiRes(x, fs, w, N, t, B)

    #y, y_combined = SMMR.sineModelMultiRes_combined(x, fs, w, N, t, B)

    # output sound file name
    outputFileInputFile = 'output_sounds/' + os.path.basename(inputFile)
    #outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'
    outputFile_combined = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelMultiRes.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(x, fs, outputFileInputFile)
    #UF.wavwrite(y, fs, outputFile)
    UF.wavwrite(y_combined, fs, outputFile_combined)

    plt.figure()
    plt.plot(x)
    plt.plot(y_combined)
    plt.show()
Ejemplo n.º 4
0
def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512):
	"""
	analysis/synthesis using the STFT
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)  
	H: hop size (at least 1/2 of analysis window size to have good overlap-add)               
	"""

	# read input sound (monophonic with sampling rate of 44100)
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the magnitude and phase spectrogram
	mX, pX = STFT.stftAnal(x, fs, w, N, H)
	 
	# perform the inverse stft
	y = STFT.stftSynth(mX, pX, M, H)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav'   

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mX, pX, y
Ejemplo n.º 5
0
def getJawaab(ipFile = '../dataset/testInputs/testInput_1.wav', ipulsePos = getPulsePosFromAnn('../dataset/testInputs/testInput_1.csv'), strokeModels = None, oFile = './tablaOutput.wav', randomFlag = 1):
    # If poolFeats are not built, give an error!
    if strokeModels == None:
        print "Train models first before calling getJawaab() ..."
        opulsePos = None
        strokeSeq = None
        oFile = None
        ts = None
    else:
        print "Getting jawaab..."
        pulsePeriod = np.median(np.diff(ipulsePos))
        print pulsePeriod
        fss, audioIn = UF.wavread(ipFile)
        if randomFlag == 1:
            strokeSeq, tStamps, opulsePos = genRandomComposition(pulsePeriod, pieceDur = len(audioIn)/params.Fs, strokeModels = strokeModels)
        else:
            invCmat = getInvCovarianceMatrix(strokeModels)
            strokeSeq, tStamps, opulsePos = genSimilarComposition(pulsePeriod, pieceDur = len(audioIn)/params.Fs, strokeModels = strokeModels, iAudioFile = ipFile, iPos = ipulsePos,invC = invCmat)
        print strokeSeq
        print tStamps
        print opulsePos
        if oFile != None:
            audio = genAudioFromStrokeSeq(strokeModels,strokeSeq,tStamps)
            audio = audio/(np.max(audio) + 0.01)
            UF.wavwrite(audio, params.Fs, oFile)
    return opulsePos, strokeSeq, tStamps, oFile
def transformation_synthesis(inputFile, fs, hfreq, hmag, freqScaling = np.array([0, 2.0, 1, .3]), 
	freqStretching = np.array([0, 1, 1, 1.5]), timbrePreservation = 1, 
	timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])):
	# transform the analysis values returned by the analysis function and synthesize the sound
	# inputFile: name of input file
	# fs: sampling rate of input file	
	# tfreq, tmag: sinusoidal frequencies and magnitudes
	# freqScaling: frequency scaling factors, in time-value pairs
	# freqStretchig: frequency stretching factors, in time-value pairs
	# timbrePreservation: 1 preserves original timbre, 0 it does not
	# timeScaling: time scaling factors, in time-value pairs

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# frequency scaling of the harmonics 
	yhfreq, yhmag = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

	# time scale the sound
	yhfreq, yhmag = ST.sineTimeScaling(yhfreq, yhmag, timeScaling)

	# synthesis 
	y = SM.sineModelSynth(yhfreq, yhmag, np.array([]), Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModelTransformation.wav'
	UF.wavwrite(y,fs, outputFile)

	# --------- plotting --------------------

	# create figure to plot
	plt.figure(figsize=(12, 6))

	# frequency range to plot
	maxplotfreq = 15000.0

	plt.subplot(2,1,1)
	# plot the transformed sinusoidal frequencies
	tracks = yhfreq*np.less(yhfreq, maxplotfreq)
	tracks[tracks<=0] = np.nan
	numFrames = int(tracks[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	plt.plot(frmTime, tracks, color='k')
	plt.title('transformed harmonic tracks')
	plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling = np.array([0, 2.0, 1, .3]), 
	timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])):
	"""
	Transform the analysis values returned by the analysis function and synthesize the sound
	inputFile: name of input file; fs: sampling rate of input file	
	tfreq, tmag: sinusoidal frequencies and magnitudes
	freqScaling: frequency scaling factors, in time-value pairs
	timeScaling: time scaling factors, in time-value pairs
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# frequency scaling of the sinusoidal tracks 
	ytfreq = ST.sineFreqScaling(tfreq, freqScaling)

	# time scale the sinusoidal tracks 
	ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling)

	# synthesis 
	y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelTransformation.wav'
	UF.wavwrite(y,fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 6))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot the transformed sinusoidal frequencies
	if (ytfreq.shape[1] > 0):
		plt.subplot(2,1,1)
		tracks = np.copy(ytfreq)
		tracks = tracks*np.less(tracks, maxplotfreq)
		tracks[tracks<=0] = np.nan
		numFrames = int(tracks[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, tracks)
		plt.title('transformed sinusoidal tracks')
		plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
def main(inputFile='../../sounds/ocean.wav', H=256, stocf=.1):

	# ------- analysis parameters -------------------

	# inputFile: input sound file (monophonic with sampling rate of 44100)
	# H: hop size
	# stocf: decimation factor used for the stochastic approximation

	# --------- computation -----------------  

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute stochastic model                                          
	mYst = STM.stochasticModelAnal(x, H, stocf)             

	# synthesize sound from stochastic model
	y = STM.stochasticModelSynth(mYst, H)    

	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav'

	# write output sound
	UF.wavwrite(y, fs, outputFile)               

	# --------- plotting --------------------

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot stochastic representation
	plt.subplot(3,1,2)
	numFrames = int(mYst[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                             
	binFreq = np.arange(stocf*H)*float(fs)/(stocf*2*H)                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(mYst))
	plt.autoscale(tight=True)
	plt.xlabel('time (sec)')
	plt.ylabel('frequency (Hz)')
	plt.title('stochastic approximation')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')

	plt.tight_layout()
	plt.show()
Ejemplo n.º 9
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    (fs, x) = wavread(inputFile)
    wavwrite(inputFile + '_downsampled.wav', sampling_rate=M)
Ejemplo n.º 10
0
def extractHarmSpec(inputFile='../../sounds/ocean.wav',
                    H=256,
                    N=512,
                    stocf=.1):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	H: hop size, N: fft size
	stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1)
	"""

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute stochastic model
    stocEnv = STM.stochasticModelAnal(x, H, N, stocf)

    # synthesize sound from stochastic model
    y = STM.stochasticModelSynth(stocEnv, H, N)

    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_stochasticModel.wav'

    # write output sound
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * N / 2) * float(fs) / (stocf * N)
    plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()
    plt.show()
Ejemplo n.º 11
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fs, x = wavread(inputFile)
    y = hopSamples(x, M)
    wavwrite(y, fs, 'test.wav')
Ejemplo n.º 12
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    fs, x = wavread(inputFile)

    wavwrite(hopSamples(x, M), int(fs / M),
             os.path.splitext(inputFile)[0] + "_downsampled.wav")
Ejemplo n.º 13
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fs, x = wavread(inputFile)
    y = hopAsmples(x, M)
    wavwrite(y, fs / M, os.path.basename(inputFile)[0:-4] + '_downsampled.wav')
Ejemplo n.º 14
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        M: downsampling factor (positive integer)
    """
    # Your code here
    fs, x = wavread(inputFile)
    y = hopSamples(x, M)
    wavwrite(y, floor(fs / M), "result.wav")
Ejemplo n.º 15
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    (fs, data) = wavread(inputFile)
    newData = data[::M]
    wavwrite(newData, fs / M, inputFile + "_downsampled.wav")
Ejemplo n.º 16
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    (fs, x) = wavread(inputFile)
    newSamples = hopSamples(x, M)
    wavwrite(newSamples, int(fs / M),
             os.path.basename(inputFile)[0:-4] + '_downsampled.wav')
Ejemplo n.º 17
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fs, x = wavread(inputFile)
    new_x = hopSamples(x, M)
    wavwrite(new_x, fs // M, re.sub('.wav', '_downsampled.wav', inputFile))
Ejemplo n.º 18
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    outputFile = inputFile.rstrip('.wav') + '_downsampled.wav'
    print outputFile
    (sr, data) = wavread(inputFile)
    downsampleData = hopSamples(data, M)
    wavwrite(downsampleData, sr / M, outputFile)
Ejemplo n.º 19
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fs,samples=wavread(inputFile)
    downsampled=hopSamples(samples,M)
    wavwrite(downsampled,fs,"{}_downsampled.wav".format(inputFile[:-4]))
    
Ejemplo n.º 20
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fileNameParts = os.path.splitext(inputFile)
    fs, x = wavread(inputFile)
    wavwrite(hopSamples(x, M), fs,
             fileNameParts[0] + '_downsampled' + fileNameParts[1])
Ejemplo n.º 21
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    samplingRate, samples = wavread(inputFile)
    downsampledSamples = hopSamples(samples, M)
    wavwrite(downsampledSamples, M,
             inputFile.replace('.wav', '_downsampled.wav'))
Ejemplo n.º 22
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        M: downsampling factor (positive integer)
    """
    (fs, x) = wavread(inputFile)
    dsfs = fs / M
    new_array = x[::M]
    downsampled = inputFile.replace('.wav', '_downsampled.wav')
    wavwrite(new_array, dsfs, downsampled)
Ejemplo n.º 23
0
def testAudio ():
    samplingRate = 44100
    freq = 30000
    duration = 2.0
    samples = np.arange(duration*samplingRate)
    signal = np.sin(2*np.pi*freq*samples/samplingRate)
    ##print(signal)
    #plt.plot(duration, signal)
    #plt.show()
    wavwrite(signal, samplingRate, "testa.wav")
    return
Ejemplo n.º 24
0
def downsampleAudio(inputFile,M):
   (fs, x) = UF.wavread(inputFile)
   x.astype(int)
   x_array = np.array(x)
   x_array_slice = x_array[::M] # equivalent to: x_array_slice[0:x_array.size:M]

   outputFile_name =   'downsampled_' + inputFile[13:]
   outputFile_path = '../../sounds/output_sounds/'
   name_and_path = outputFile_path + outputFile_name

   UF.wavwrite(x_array_slice, fs, name_and_path )
Ejemplo n.º 25
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    fs, x = wavread(inputFile)
    y = hopSamples(x, M)
    basename, extension = inputFile.rsplit(".", 1)
    outputFile = basename + "_downsampled." + extension
    wavwrite(y, fs / M, outputFile)
Ejemplo n.º 26
0
def writeSound(y, fs, name):
    '''
	writes a constructed sound to a file if the sound is 16bit,
	the program uses the utilFunctions module to write the sound,
	otherwise, it uses the python library sound and writes at
	24bits.
	'''
    outPutAttack = name
    if fs == 44100:
        UF.wavwrite(y, fs, outPutAttack)
    else:
        sf.write(outPutAttack, y, fs, subtype="PCM_24")
Ejemplo n.º 27
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    x = wavread(inputFile)[1]
    fs = wavread(inputFile)[0]
    a = hopSamples(x, M)
    file_name = inputFile.replace('.wav', '_downsampled.wav')
    print(file_name)
    wavwrite(a, fs/M, file_name)
Ejemplo n.º 28
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    fs, x = wavread(inputFile)
    if fs <> 44100:
        print "Sample rate must be 44100."
    ds = hopSamples(x, M)
    wavwrite(ds, fs / M, inputFile[:-4] + "_downsampled.wav")
Ejemplo n.º 29
0
def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	H: hop size, N: fft size
	stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1)
	"""

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute stochastic model                                          
	stocEnv = STM.stochasticModelAnal(x, H, N, stocf)             

	# synthesize sound from stochastic model
	y = STM.stochasticModelSynth(stocEnv, H, N)    

	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav'

	# write output sound
	UF.wavwrite(y, fs, outputFile)               

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot stochastic representation
	plt.subplot(3,1,2)
	numFrames = int(stocEnv[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                             
	binFreq = np.arange(stocf*(N/2+1))*float(fs)/(stocf*N)                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv))
	plt.autoscale(tight=True)
	plt.xlabel('time (sec)')
	plt.ylabel('frequency (Hz)')
	plt.title('stochastic approximation')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')

	plt.tight_layout()
	plt.show(block=False)
Ejemplo n.º 30
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    (sampleRate, dataArray) = wavread(inputFile)
    downSampleByM = dataArray[::M]
    outputRate = sampleRate/M
    wavwrite(downSampleByM, outputRate, 'test%s_downsampled.wav' %(M))

    return
Ejemplo n.º 31
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    (fs, x) = wavread(inputFile)
    ##Start downsampling
    x = x[::M]
    ##New sampling rate
    fs = fs/float(M)
    wavwrite(x,fs,'output_downsampled.wav')
Ejemplo n.º 32
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    ## Your code here
    outputFile = inputFile[0:inputFile.rfind('.')] + "_downsampled.wav"

    (fs, x) = wavread(inputFile)
    fs = int(fs / M)
    y = hopSamples(x, M)

    wavwrite(y, fs, outputFile)
Ejemplo n.º 33
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    print('Reading file...')
    fs, x = wavread(inputFile)
    print('Sample rate: ', fs)
    print('Number of samples: ', len(x))

    y = x[::M]
    newFs = fs / M
    wavwrite(y, newFs, 'downsampled.wav')
Ejemplo n.º 34
0
def main(inputFile='../../sounds/vignesh.wav',
         window='blackman',
         M=1201,
         N=2048,
         t=-90,
         minSineDur=0.1,
         nH=100,
         minf0=130,
         maxf0=300,
         f0et=7,
         harmDevSlope=0.01):
    """
	Analysis and synthesis using the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # detect harmonics of input sound
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)

    # synthesize the harmonics
    y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_harmonicModel.wav'

    # write the sound resulting from harmonic analysis
    UF.wavwrite(y, fs, outputFile)
    return x, fs, hfreq, y
Ejemplo n.º 35
0
def writeExampleFiles():
    """
    A convenience function: writes out example files, some of them with optimal parameters found by exploreSineModelMultiRes()
    """
    inputFile='../../sounds/orchestra.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['blackmanharris'])
    M = np.array([1001])
    N = np.array([4096])
    B = np.array([ ])
    T = np.array([-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_optimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)

    inputFile='../../sounds/121061__thirsk__160-link-strings-2-mono.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['hamming','hamming','hamming'])
    M = np.array([3001,1501,751])
    N = np.array([16384,8192,4096])
    B = np.array([2756.25,5512.5])
    T = np.array([-90,-90,-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_optimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)

    inputFile='../../sounds/orchestra.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['hamming','hamming','hamming'])
    M = np.array([3001,1501,751])
    N = np.array([16384,8192,4096])
    B = np.array([2756.25,5512.5])
    T = np.array([-90,-90,-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_nonOptimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)

    inputFile='../../sounds/121061__thirsk__160-link-strings-2-mono.wav'
    fs, x = UF.wavread(inputFile)
    W = np.array(['blackmanharris'])
    M = np.array([1001])
    N = np.array([4096])
    B = np.array([ ])
    T = np.array([-90])
    Ns = 512
    best = Best()
    y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
    outputFile = inputFile[:-4] + '_nonOptimizedSineModel.wav'
    print '->',outputFile
    UF.wavwrite(y, fs, outputFile)
Ejemplo n.º 36
0
def downsampleAudio(inputFile, M):
    """
    Inputs:
        inputFile: file name of the wav file (including path)
        	M: downsampling factor (positive integer)
    """
    fs, x = wavread(inputFile)

    y = hopSamples(x, M)

    dirname = os.path.dirname(inputFile)
    file, ext = os.path.basename(inputFile).split('.')

    outputFile = os.path.join(dirname, file + '_downsampled.' + ext)
    wavwrite(y, int(fs / M), outputFile)
Ejemplo n.º 37
0
def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	H: hop size, N: fft size
	stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1)
	"""

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute stochastic model                                          
	stocEnv = STM.stochasticModelAnal(x, H, N, stocf)             

	# synthesize sound from stochastic model
	y = STM.stochasticModelSynth(stocEnv, H, N)    

	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav'

	# write output sound
	UF.wavwrite(y, fs, outputFile)      
	return x, fs, stocEnv, y
Ejemplo n.º 38
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, 
					maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""
		
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# analyze the sound with the sinusoidal model
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the output sound from the sinusoidal representation
	y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# output sound file name
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the synthesized sound obtained from the sinusoidal synthesis
	UF.wavwrite(y, fs, outputFile)
	return x,fs,tfreq,y
Ejemplo n.º 39
0
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, 
	minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01):
	"""
	Analysis and synthesis using the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# detect harmonics of input sound
	hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)

	# synthesize the harmonics
	y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)  

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav'

	# write the sound resulting from harmonic analysis
	UF.wavwrite(y, fs, outputFile)
	return x,fs,hfreq,y
Ejemplo n.º 40
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, 
	minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal plus residual analysis
	tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope)
		
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

	# sum sinusoids and residual
	y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mXr, tfreq, y
Ejemplo n.º 41
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	"""
	Perform analysis/synthesis using the harmonic plus residual model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)
	  
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mXr,hfreq, y
Ejemplo n.º 42
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic plus stochastic model of the whole sound
	hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)
		
	# synthesize a sound from the harmonic plus stochastic representation
	y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_sines.wav'
	outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_stochastic.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav'

	# write sounds files for harmonics, stochastic, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(yst, fs, outputFileStochastic)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, hfreq, stocEnv, y
Ejemplo n.º 43
0
f0et=5
harmDevSlope=0.01
stocf=0.1

Ns = 512
H = 128

(fs, x) = UF.wavread(inputFile)
w = get_window(window, M)
hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)
timeScaling = np.array([0, 0, 2.138, 2.138-1.5, 3.146, 3.146])
yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreq, hmag, mYst, timeScaling)

y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

UF.wavwrite(y,fs, 'hps-transformation.wav')


plt.figure(figsize=(12, 9))

maxplotfreq = 14900.0

# plot the input sound
plt.subplot(4,1,1)
plt.plot(np.arange(x.size)/float(fs), x)
plt.axis([0, x.size/float(fs), min(x), max(x)])
plt.title('x (sax-phrase-short.wav')

# plot spectrogram stochastic compoment
plt.subplot(4,1,2)
numFrames = int(mYst[:,0].size)
Ejemplo n.º 44
0
def analysis(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1):
	"""
	Analyze a sound with the harmonic plus stochastic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	returns inputFile: input file name; fs: sampling rate of input file,
	        hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic plus stochastic model of the whole sound
	hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)

	# synthesize the harmonic plus stochastic model without original phases
	y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav'
	UF.wavwrite(y,fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot spectrogram stochastic compoment
	plt.subplot(3,1,2)
	numFrames = int(mYst[:,0].size)
	sizeEnv = int(mYst[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot harmonic on top of stochastic spectrogram
	if (hfreq.shape[1] > 0):
		harms = hfreq*np.less(hfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, hfreq, hmag, mYst
Ejemplo n.º 45
0
def exploreSineModelMultiRes(inputFile='../../sounds/orchestra.wav'):
    """
    inputFile (string) = wav file including the path
    """
    fs, x = UF.wavread(inputFile)               # read input sound

    # First, let's check whether the new code returns same result as old one for mono-resolution case    
    
    verifySineModelMultiRes()

   # Let's find optimal parameters in a reasonable range 
  
    windows =['hanning', 'hamming', 'blackman', 'blackmanharris']

    best = Best()
    
    for k in range(5,80,5):
        m = k * 100 + 1                                  # Window size in samples
        for window in windows:                           # Window type
            for t in range(-90,-100,-10):                # Threshold
                for Ns in [512]:                         # size of fft used in synthesis
                    n = 2
                    while n < m: n = n * 2                           # size of fft used in analysis                   
                    for nPower in range(0,3):                        # try out the analysis window closest to window size, and some larger ones 
                        for nAdditionalResolutions in range(0,4):    # try out multi-resolution analysis windows
                            W = np.array([window])
                            M = np.array([m])
                            N = np.array([n])
                            B = np.array([ ])
                            T = np.array([t])

                            log_m = np.log(float(m))
                            log_n = np.log(float(n))
                            log_f = np.log(fs/2.0) 
                            log_step = np.log(2)
                            
                            executeStep = True
                            continueAddingResolutions = True
                            for additionalResolution in range(0,nAdditionalResolutions):
                                if continueAddingResolutions:
                                    scaledM = int(np.exp(log_m - log_step*(additionalResolution+1)))
                                    if scaledM % 2 == 0: scaledM = scaledM + 1
                                    scaledN = int(np.exp(log_n - log_step*(additionalResolution+1)))
                                    if scaledN < scaledM: scaledN = scaledM
                                    appropriateScaledN = 2
                                    while appropriateScaledN < scaledN: appropriateScaledN = appropriateScaledN * 2
                                    frequencyBoundary = np.exp(log_f - (log_step*(nAdditionalResolutions - additionalResolution)))
                                    if scaledM < Ns:
                                        continueAddingResolutions = False
                                        if additionalResolution == 0: executeStep = False
                                    else:
                                        W = np.append(W,window)
                                        M = np.append(M,scaledM)
                                        N = np.append(N,appropriateScaledN)
                                        B = np.append(B,frequencyBoundary)
                                        T = np.append(T,t)
                            if executeStep:
                                best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T)
                        n = n * 2
                        
    print 'FILE:',inputFile
    print 'BEST:','diff =',best.diff,'for W =',best.W,', M =',best.M,', N =',best.N,', B =',best.B,', T =',best.T,', Ns =',best.Ns
    
    y_best = best.calculateAndUpdate(x, fs, best.Ns, best.W, best.M, best.N, best.B, best.T)
    outputFile = inputFile[:-4] + '_optimizedSineModel.wav'
    UF.wavwrite(y_best, fs, outputFile)
Ejemplo n.º 46
0
# compute the FO and the harmonics
t = -97
minf0 = 310
maxf0 = 450
f0et = 4
nH = 70
harmDevSlope = 0.01
Ns = H * 4
minSineDur = 0.3
hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)
hfreqt = copy.copy(hfreq)
hfreqt[:, 1:] = 0
yf0 = 4 * SM.sineModelSynth(hfreqt, hmag, hphase, Ns, H, fs)
yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)
UF.wavwrite(yf0, fs, "cello-phrase-f0.wav")
UF.wavwrite(yh, fs, "cello-phrase-harmonics.wav")

# plot the F0 on top of the spectrogram
plt.figure(3, figsize=(16, 4.5))
maxplotfreq = 5000.0
harms = hfreq * np.less(hfreq, maxplotfreq)
harms[harms[:, 0] == 0] = np.nan
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(N * maxplotfreq / fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1]))
plt.plot(frmTime, harms[:, 0], linewidth=3, color="0")
plt.xlabel("time (sec)")
plt.ylabel("frequency (Hz)")
plt.title("spectrogram + fundamental frequency")
Ejemplo n.º 47
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100,
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	"""
	Perform analysis/synthesis using the harmonic plus residual model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)

	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot the magnitude spectrogram of residual
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mXr[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = np.arange(maxplotbin+1)*float(fs)/N
	plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1]))
	plt.autoscale(tight=True)

	# plot harmonic frequencies on residual spectrogram
	if (hfreq.shape[1] > 0):
		harms = hfreq*np.less(hfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time(s)')
		plt.ylabel('frequency(Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + residual spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.ion()
	plt.show()
Ejemplo n.º 48
0
binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1]))
plt.autoscale(tight=True)

plt.subplot(4,1,3)
numFrames = int(ytfreq[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)
tracks = ytfreq*np.less(ytfreq, maxplotfreq)
tracks[tracks<=0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1)
plt.autoscale(tight=True)
plt.title('mY + time-scaled sine frequencies') 

maxplotbin = int(N*maxplotfreq/fs)
numFrames = int(mY[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)                             
binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:,:maxplotbin+1]))
plt.autoscale(tight=True) 

plt.subplot(4,1,4)
plt.plot(np.arange(y.size)/float(fs), y, 'b')
plt.axis([0,y.size/float(fs),min(y),max(y)])
plt.title('y')    

plt.tight_layout()
UF.wavwrite(y, fs, 'mridangam-sineModelTimeScale.wav')
plt.savefig('sineModelTimeScale-mridangam.png')
plt.show()

Ejemplo n.º 49
0
plt.subplot(311)
numFrames = int(mX[:,0].size)
frmTime = H1*np.arange(numFrames)/float(fs)                             
binFreq = fs*np.arange(N1*maxplotfreq/fs)/N1                       
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N1*maxplotfreq/fs+1)])) 
plt.title('mX (orchestra.wav)')
plt.autoscale(tight=True)

plt.subplot(312)
numFrames = int(mX2[:,0].size)
frmTime = H1*np.arange(numFrames)/float(fs)  
                 
N = 2*mX2[0,:].size         
binFreq = fs*np.arange(N*maxplotfreq/fs)/N                       
plt.pcolormesh(frmTime, binFreq, np.transpose(mX2[:,:int(N*maxplotfreq/fs+1)]))
plt.title('mX2 (speech-male.wav)')
plt.autoscale(tight=True)

plt.subplot(313)
numFrames = int(mY[:,0].size)
frmTime = H1*np.arange(numFrames)/float(fs)                             
binFreq = fs*np.arange(N1*maxplotfreq/fs)/N1                       
plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:,:int(N1*maxplotfreq/fs+1)])) 
plt.title('mY')
plt.autoscale(tight=True)

plt.tight_layout()
UF.wavwrite(y, fs, 'orchestra-speech-stftMorph.wav')
plt.savefig('stftMorph-orchestra.png')
plt.show()
Ejemplo n.º 50
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, 
					maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""
		
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# analyze the sound with the sinusoidal model
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the output sound from the sinusoidal representation
	y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# output sound file name
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the synthesized sound obtained from the sinusoidal synthesis
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
				
	# plot the sinusoidal frequencies
	plt.subplot(3,1,2)
	if (tfreq.shape[1] > 0):
		numFrames = tfreq.shape[0]
		frmTime = H*np.arange(numFrames)/float(fs)
		tfreq[tfreq<=0] = np.nan
		plt.plot(frmTime, tfreq)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of sinusoidal tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)
Ejemplo n.º 51
0
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'):
    """
    Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the 
    f0 contour on the spectrogram and synthesize the f0 contour.
    Input:
        inputFile (string): wav file including the path
    Output:
        f0 (numpy array): array of the estimated fundamental frequency (f0) values
    """

    ### Change these analysis parameter values
    window = "blackman"
    M = 4401
    N = 8192
    f0et = 7
    t = -90.0
    minf0 = 140
    maxf0 = 210

    ### Do not modify the code below 
    H = 256                                                     #fix hop size
      
    fs, x = UF.wavread(inputFile)                               #reading inputFile
    w  = get_window(window, M)                                  #obtaining analysis window    
    
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)  #estimating F0
    startFrame = np.floor(0.5*fs/H)    
    endFrame = np.ceil(4.0*fs/H)
    f0[:startFrame] = 0
    f0[endFrame:] = 0
    y = UF.sinewaveSynth(f0, 0.8, H, fs)
    UF.wavwrite(y, fs, 'synthF0Contour.wav')

    ## Code for plotting the f0 contour on top of the spectrogram
    # frequency range to plot
    maxplotfreq = 500.0    
    fontSize = 16
    plot = 1            # plot = 1 plots the f0 contour, otherwise saves it to a file.  

    fig = plt.figure()
    ax = fig.add_subplot(111)

    mX, pX = stft.stftAnal(x, fs, w, N, H)                      #using same params as used for analysis
    mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1])
    
    timeStamps = np.arange(mX.shape[1])*H/float(fs)                             
    binFreqs = np.arange(mX.shape[0])*fs/float(N)
    
    plt.pcolormesh(timeStamps, binFreqs, mX)
    plt.plot(timeStamps, f0, color = 'k', linewidth=1.5)
    plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5)
    plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5)
    
    
    plt.autoscale(tight=True)
    plt.ylabel('Frequency (Hz)', fontsize = fontSize)
    plt.xlabel('Time (s)', fontsize = fontSize)
    plt.legend(('f0',))
    
    xLim = ax.get_xlim()
    yLim = ax.get_ylim()
    ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0])))    

    if plot == 1: #save the plot too!
        plt.autoscale(tight=True) 
        plt.show()
    else:
        fig.tight_layout()
        fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight')

    return f0
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, 
	minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01):
	"""
	Analyze a sound with the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	returns inputFile: input file name; fs: sampling rate of input file, tfreq, 
						tmag: sinusoidal frequencies and magnitudes
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic model of the whole sound
	hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)

	# synthesize the sines without original phases
	y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav'

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
	
	if (hfreq.shape[1] > 0):
		plt.subplot(3,1,2)
		tracks = np.copy(hfreq)
		numFrames = tracks.shape[0]
		frmTime = H*np.arange(numFrames)/float(fs)
		tracks[tracks<=0] = np.nan
		plt.plot(frmTime, tracks)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of harmonic tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, hfreq, hmag
Ejemplo n.º 53
0
plt.axis([0,x.size/float(fs),min(x),max(x)])

plt.subplot(412)
numFrames = int(mX[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)                             
binFreq = np.arange(mX[0,:].size)*float(fs)/N                         
plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
plt.title('mX, M=1024, N=1024, H=512')
plt.autoscale(tight=True)

plt.subplot(413)
numFrames = int(pX[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)                             
binFreq = np.arange(pX[0,:].size)*float(fs)/N                         
plt.pcolormesh(frmTime, binFreq, np.diff(np.transpose(pX),axis=0))
plt.title('pX derivative, M=1024, N=1024, H=512')
plt.autoscale(tight=True)

plt.subplot(414)
plt.plot(np.arange(y.size)/float(fs), y,'b')
plt.axis([0,y.size/float(fs),min(y),max(y)])
plt.title('y')

plt.tight_layout()
plt.savefig('stft-system.png')
UF.wavwrite(y, fs, 'piano-stft.wav')
plt.show()
  
  
  
Ejemplo n.º 54
0
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), 
	freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, 
	timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146])):
	"""
	transform the analysis values returned by the analysis function and synthesize the sound
	inputFile: name of input file
	fs: sampling rate of input file	
	hfreq, hmag: harmonic frequencies and magnitudes
	mYst: stochastic residual
	freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling)
	freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching)
	timbrePreservation: 1 preserves original timbre, 0 it does not
	timeScaling: time scaling factors, in time-value pairs
	"""
	
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128
	
	# frequency scaling of the harmonics 
	hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

	# time scaling the sound
	yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling)

	# synthesis from the trasformed hps representation 
	y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav'
	UF.wavwrite(y,fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 6))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot spectrogram of transformed stochastic compoment
	plt.subplot(2,1,1)
	numFrames = int(ystocEnv[:,0].size)
	sizeEnv = int(ystocEnv[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot transformed harmonic on top of stochastic spectrogram
	if (yhfreq.shape[1] > 0):
		harms = yhfreq*np.less(yhfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram')

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
Ejemplo n.º 55
0
def exploreSineModel(inputFile='../sms-tools/sounds/multisines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
            Discuss on the forum!
    """
    window='hamming'                            # Window type
    M=3001                                      # Window size in sample
    N=4096                                      # FFT Size
    t=-80                                       # Threshold                
    minSineDur=0.02                             # minimum duration of a sinusoid
    maxnSines=15                                # Maximum number of sinusoids at any time frame
    freqDevOffset=10                            # minimum frequency deviation at 0Hz
    freqDevSlope=0.001                          # slope increase of minimum frequency deviation
    Ns = 512                                    # size of fft used in synthesis
    H = 128                                     # hop size (has to be 1/4 of Ns)
    
    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3,1,1)
    plt.plot(np.arange(x.size)/float(fs), x)
    plt.axis([0, x.size/float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')
                
    # plot the sinusoidal frequencies
    plt.subplot(3,1,2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H*np.arange(numFrames)/float(fs)
        tfreq[tfreq<=0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size/float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3,1,3)
    plt.plot(np.arange(y.size)/float(fs), y)
    plt.axis([0, y.size/float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True
Ejemplo n.º 56
0
def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512):
	"""
	analysis/synthesis using the STFT
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)  
	H: hop size (at least 1/2 of analysis window size to have good overlap-add)               
	"""

	# read input sound (monophonic with sampling rate of 44100)
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the magnitude and phase spectrogram
	mX, pX = STFT.stftAnal(x, w, N, H)
	 
	# perform the inverse stft
	y = STFT.stftSynth(mX, pX, M, H)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav'   

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(4,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot magnitude spectrogram
	plt.subplot(4,1,2)
	numFrames = int(mX[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                             
	binFreq = fs*np.arange(N*maxplotfreq/fs)/N  
	plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]))
	plt.xlabel('time (sec)')
	plt.ylabel('frequency (Hz)')
	plt.title('magnitude spectrogram')
	plt.autoscale(tight=True)

	# plot the phase spectrogram
	plt.subplot(4,1,3)
	numFrames = int(pX[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                             
	binFreq = fs*np.arange(N*maxplotfreq/fs)/N                        
	plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:,:N*maxplotfreq/fs+1],axis=1)))
	plt.xlabel('time (sec)')
	plt.ylabel('frequency (Hz)')
	plt.title('phase spectrogram (derivative)')
	plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(4,1,4)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
Ejemplo n.º 57
0
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, 
	minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02):
	"""
	Analyze a sound with the sine model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	returns inputFile: input file name; fs: sampling rate of input file,
	        tfreq, tmag: sinusoidal frequencies and magnitudes
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the sine model of the whole sound
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the sines without original phases
	y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
		
	# plot the sinusoidal frequencies
	if (tfreq.shape[1] > 0):
		plt.subplot(3,1,2)
		tracks = np.copy(tfreq)
		tracks = tracks*np.less(tracks, maxplotfreq)
		tracks[tracks<=0] = np.nan
		numFrames = int(tracks[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, tracks)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of sinusoidal tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, tfreq, tmag