def onset_test(noise_ffts, noise_phs, ind, mY, pY, M, H, recType, outDir, name, fs): """ Synthesis of a sound using the short-time Fourier transform mY: magnitude spectra, pY: phase spectra, M: window size, H: hop-size returns y: output sound """ for i in ind: mY[i - 3:i + 3, :] = noise_ffts[97:103, :] pY[i - 3:i + 3, :] = noise_phs[97:103, :] hM1 = (M + 1) // 2 # half analysis window size by rounding hM2 = M // 2 # half analysis window size by floor nFrames = mY[:, 0].size # number of frames y = np.zeros(nFrames * H + hM1 + hM2) # initialize output array pin = hM1 for i in range(nFrames): # iterate over all frames y1 = DFT.dftSynth(mY[i, :], pY[i, :], M) # compute idft y[pin - hM1:pin + hM2] += H * y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete( y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range( y.size - hM1, y.size)) # delete the end of the sound that was added in stftAnal os.chdir('/home/tgoodall/sms-tools/software/models/Overtone_Arrays/' + recType + '/' + outDir) outputFile = name + '.wav' UF.wavwrite(y, fs, outputFile) return y
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ print("Reading WAV...") (sampleRate, samples) = wavread(inputFile) step = M print("Sample Rate:",sampleRate,"kHz/16") print("Sample Size:",len(samples)) base = os.path.basename(inputFile) outputFile = "%s_downsampled.wav" % base.replace(".wav", "") print("Downsampling...") downsampled = hopSamples(samples, step) print("Writing downsampled WAV...") flen=len(samples) nlen=len(downsampled) newRate = sampleRate/(flen/nlen) wavwrite(downsampled, sampleRate, outputFile) print("Done:", outputFile)
def computeModel(inputFile, B, M, window = 'hanning', t = -90): bands = range(len(B)) fs, x = UF.wavread(inputFile) w = [get_window(window, M[i]) for i in bands] N = (2**np.ceil(np.log2(B))).astype(int) y_combined = SMMR.sineModelMultiRes(x, fs, w, N, t, B) #y, y_combined = SMMR.sineModelMultiRes_combined(x, fs, w, N, t, B) # output sound file name outputFileInputFile = 'output_sounds/' + os.path.basename(inputFile) #outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' outputFile_combined = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelMultiRes.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(x, fs, outputFileInputFile) #UF.wavwrite(y, fs, outputFile) UF.wavwrite(y_combined, fs, outputFile_combined) plt.figure() plt.plot(x) plt.plot(y_combined) plt.show()
def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512): """ analysis/synthesis using the STFT inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) H: hop size (at least 1/2 of analysis window size to have good overlap-add) """ # read input sound (monophonic with sampling rate of 44100) fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the magnitude and phase spectrogram mX, pX = STFT.stftAnal(x, fs, w, N, H) # perform the inverse stft y = STFT.stftSynth(mX, pX, M, H) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) return x, fs, mX, pX, y
def getJawaab(ipFile = '../dataset/testInputs/testInput_1.wav', ipulsePos = getPulsePosFromAnn('../dataset/testInputs/testInput_1.csv'), strokeModels = None, oFile = './tablaOutput.wav', randomFlag = 1): # If poolFeats are not built, give an error! if strokeModels == None: print "Train models first before calling getJawaab() ..." opulsePos = None strokeSeq = None oFile = None ts = None else: print "Getting jawaab..." pulsePeriod = np.median(np.diff(ipulsePos)) print pulsePeriod fss, audioIn = UF.wavread(ipFile) if randomFlag == 1: strokeSeq, tStamps, opulsePos = genRandomComposition(pulsePeriod, pieceDur = len(audioIn)/params.Fs, strokeModels = strokeModels) else: invCmat = getInvCovarianceMatrix(strokeModels) strokeSeq, tStamps, opulsePos = genSimilarComposition(pulsePeriod, pieceDur = len(audioIn)/params.Fs, strokeModels = strokeModels, iAudioFile = ipFile, iPos = ipulsePos,invC = invCmat) print strokeSeq print tStamps print opulsePos if oFile != None: audio = genAudioFromStrokeSeq(strokeModels,strokeSeq,tStamps) audio = audio/(np.max(audio) + 0.01) UF.wavwrite(audio, params.Fs, oFile) return opulsePos, strokeSeq, tStamps, oFile
def transformation_synthesis(inputFile, fs, hfreq, hmag, freqScaling = np.array([0, 2.0, 1, .3]), freqStretching = np.array([0, 1, 1, 1.5]), timbrePreservation = 1, timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])): # transform the analysis values returned by the analysis function and synthesize the sound # inputFile: name of input file # fs: sampling rate of input file # tfreq, tmag: sinusoidal frequencies and magnitudes # freqScaling: frequency scaling factors, in time-value pairs # freqStretchig: frequency stretching factors, in time-value pairs # timbrePreservation: 1 preserves original timbre, 0 it does not # timeScaling: time scaling factors, in time-value pairs # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics yhfreq, yhmag = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scale the sound yhfreq, yhmag = ST.sineTimeScaling(yhfreq, yhmag, timeScaling) # synthesis y = SM.sineModelSynth(yhfreq, yhmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModelTransformation.wav' UF.wavwrite(y,fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 plt.subplot(2,1,1) # plot the transformed sinusoidal frequencies tracks = yhfreq*np.less(yhfreq, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks, color='k') plt.title('transformed harmonic tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling = np.array([0, 2.0, 1, .3]), timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file; fs: sampling rate of input file tfreq, tmag: sinusoidal frequencies and magnitudes freqScaling: frequency scaling factors, in time-value pairs timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the sinusoidal tracks ytfreq = ST.sineFreqScaling(tfreq, freqScaling) # time scale the sinusoidal tracks ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling) # synthesis y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelTransformation.wav' UF.wavwrite(y,fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot the transformed sinusoidal frequencies if (ytfreq.shape[1] > 0): plt.subplot(2,1,1) tracks = np.copy(ytfreq) tracks = tracks*np.less(tracks, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks) plt.title('transformed sinusoidal tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def main(inputFile='../../sounds/ocean.wav', H=256, stocf=.1): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # H: hop size # stocf: decimation factor used for the stochastic approximation # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model mYst = STM.stochasticModelAnal(x, H, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(mYst, H) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3,1,2) numFrames = int(mYst[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(stocf*H)*float(fs)/(stocf*2*H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show()
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here (fs, x) = wavread(inputFile) wavwrite(inputFile + '_downsampled.wav', sampling_rate=M)
def extractHarmSpec(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model stocEnv = STM.stochasticModelAnal(x, H, N, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3, 1, 2) numFrames = int(stocEnv[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * N / 2) * float(fs) / (stocf * N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show()
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here fs, x = wavread(inputFile) y = hopSamples(x, M) wavwrite(y, fs, 'test.wav')
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ fs, x = wavread(inputFile) wavwrite(hopSamples(x, M), int(fs / M), os.path.splitext(inputFile)[0] + "_downsampled.wav")
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here fs, x = wavread(inputFile) y = hopAsmples(x, M) wavwrite(y, fs / M, os.path.basename(inputFile)[0:-4] + '_downsampled.wav')
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ # Your code here fs, x = wavread(inputFile) y = hopSamples(x, M) wavwrite(y, floor(fs / M), "result.wav")
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here (fs, data) = wavread(inputFile) newData = data[::M] wavwrite(newData, fs / M, inputFile + "_downsampled.wav")
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ (fs, x) = wavread(inputFile) newSamples = hopSamples(x, M) wavwrite(newSamples, int(fs / M), os.path.basename(inputFile)[0:-4] + '_downsampled.wav')
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here fs, x = wavread(inputFile) new_x = hopSamples(x, M) wavwrite(new_x, fs // M, re.sub('.wav', '_downsampled.wav', inputFile))
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ outputFile = inputFile.rstrip('.wav') + '_downsampled.wav' print outputFile (sr, data) = wavread(inputFile) downsampleData = hopSamples(data, M) wavwrite(downsampleData, sr / M, outputFile)
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here fs,samples=wavread(inputFile) downsampled=hopSamples(samples,M) wavwrite(downsampled,fs,"{}_downsampled.wav".format(inputFile[:-4]))
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here fileNameParts = os.path.splitext(inputFile) fs, x = wavread(inputFile) wavwrite(hopSamples(x, M), fs, fileNameParts[0] + '_downsampled' + fileNameParts[1])
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here samplingRate, samples = wavread(inputFile) downsampledSamples = hopSamples(samples, M) wavwrite(downsampledSamples, M, inputFile.replace('.wav', '_downsampled.wav'))
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ (fs, x) = wavread(inputFile) dsfs = fs / M new_array = x[::M] downsampled = inputFile.replace('.wav', '_downsampled.wav') wavwrite(new_array, dsfs, downsampled)
def testAudio (): samplingRate = 44100 freq = 30000 duration = 2.0 samples = np.arange(duration*samplingRate) signal = np.sin(2*np.pi*freq*samples/samplingRate) ##print(signal) #plt.plot(duration, signal) #plt.show() wavwrite(signal, samplingRate, "testa.wav") return
def downsampleAudio(inputFile,M): (fs, x) = UF.wavread(inputFile) x.astype(int) x_array = np.array(x) x_array_slice = x_array[::M] # equivalent to: x_array_slice[0:x_array.size:M] outputFile_name = 'downsampled_' + inputFile[13:] outputFile_path = '../../sounds/output_sounds/' name_and_path = outputFile_path + outputFile_name UF.wavwrite(x_array_slice, fs, name_and_path )
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ fs, x = wavread(inputFile) y = hopSamples(x, M) basename, extension = inputFile.rsplit(".", 1) outputFile = basename + "_downsampled." + extension wavwrite(y, fs / M, outputFile)
def writeSound(y, fs, name): ''' writes a constructed sound to a file if the sound is 16bit, the program uses the utilFunctions module to write the sound, otherwise, it uses the python library sound and writes at 24bits. ''' outPutAttack = name if fs == 44100: UF.wavwrite(y, fs, outPutAttack) else: sf.write(outPutAttack, y, fs, subtype="PCM_24")
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ x = wavread(inputFile)[1] fs = wavread(inputFile)[0] a = hopSamples(x, M) file_name = inputFile.replace('.wav', '_downsampled.wav') print(file_name) wavwrite(a, fs/M, file_name)
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here fs, x = wavread(inputFile) if fs <> 44100: print "Sample rate must be 44100." ds = hopSamples(x, M) wavwrite(ds, fs / M, inputFile[:-4] + "_downsampled.wav")
def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model stocEnv = STM.stochasticModelAnal(x, H, N, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3,1,2) numFrames = int(stocEnv[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(stocf*(N/2+1))*float(fs)/(stocf*N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() plt.show(block=False)
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here (sampleRate, dataArray) = wavread(inputFile) downSampleByM = dataArray[::M] outputRate = sampleRate/M wavwrite(downSampleByM, outputRate, 'test%s_downsampled.wav' %(M)) return
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here (fs, x) = wavread(inputFile) ##Start downsampling x = x[::M] ##New sampling rate fs = fs/float(M) wavwrite(x,fs,'output_downsampled.wav')
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ ## Your code here outputFile = inputFile[0:inputFile.rfind('.')] + "_downsampled.wav" (fs, x) = wavread(inputFile) fs = int(fs / M) y = hopSamples(x, M) wavwrite(y, fs, outputFile)
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ print('Reading file...') fs, x = wavread(inputFile) print('Sample rate: ', fs) print('Number of samples: ', len(x)) y = x[::M] newFs = fs / M wavwrite(y, newFs, 'downsampled.wav')
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, outputFile) return x, fs, hfreq, y
def writeExampleFiles(): """ A convenience function: writes out example files, some of them with optimal parameters found by exploreSineModelMultiRes() """ inputFile='../../sounds/orchestra.wav' fs, x = UF.wavread(inputFile) W = np.array(['blackmanharris']) M = np.array([1001]) N = np.array([4096]) B = np.array([ ]) T = np.array([-90]) Ns = 512 best = Best() y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T) outputFile = inputFile[:-4] + '_optimizedSineModel.wav' print '->',outputFile UF.wavwrite(y, fs, outputFile) inputFile='../../sounds/121061__thirsk__160-link-strings-2-mono.wav' fs, x = UF.wavread(inputFile) W = np.array(['hamming','hamming','hamming']) M = np.array([3001,1501,751]) N = np.array([16384,8192,4096]) B = np.array([2756.25,5512.5]) T = np.array([-90,-90,-90]) Ns = 512 best = Best() y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T) outputFile = inputFile[:-4] + '_optimizedSineModel.wav' print '->',outputFile UF.wavwrite(y, fs, outputFile) inputFile='../../sounds/orchestra.wav' fs, x = UF.wavread(inputFile) W = np.array(['hamming','hamming','hamming']) M = np.array([3001,1501,751]) N = np.array([16384,8192,4096]) B = np.array([2756.25,5512.5]) T = np.array([-90,-90,-90]) Ns = 512 best = Best() y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T) outputFile = inputFile[:-4] + '_nonOptimizedSineModel.wav' print '->',outputFile UF.wavwrite(y, fs, outputFile) inputFile='../../sounds/121061__thirsk__160-link-strings-2-mono.wav' fs, x = UF.wavread(inputFile) W = np.array(['blackmanharris']) M = np.array([1001]) N = np.array([4096]) B = np.array([ ]) T = np.array([-90]) Ns = 512 best = Best() y = best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T) outputFile = inputFile[:-4] + '_nonOptimizedSineModel.wav' print '->',outputFile UF.wavwrite(y, fs, outputFile)
def downsampleAudio(inputFile, M): """ Inputs: inputFile: file name of the wav file (including path) M: downsampling factor (positive integer) """ fs, x = wavread(inputFile) y = hopSamples(x, M) dirname = os.path.dirname(inputFile) file, ext = os.path.basename(inputFile).split('.') outputFile = os.path.join(dirname, file + '_downsampled.' + ext) wavwrite(y, int(fs / M), outputFile)
def main(inputFile='../../sounds/ocean.wav', H=256, N=512, stocf=.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = UF.wavread(inputFile) # compute stochastic model stocEnv = STM.stochasticModelAnal(x, H, N, stocf) # synthesize sound from stochastic model y = STM.stochasticModelSynth(stocEnv, H, N) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stochasticModel.wav' # write output sound UF.wavwrite(y, fs, outputFile) return x, fs, stocEnv, y
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) return x,fs,tfreq,y
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, outputFile) return x,fs,hfreq,y
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal plus residual analysis tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # sum sinusoids and residual y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav' # write sounds files for sinusoidal, residual, and the sum UF.wavwrite(ys, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) return x, fs, mXr, tfreq, y
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) return x, fs, mXr,hfreq, y
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize a sound from the harmonic plus stochastic representation y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_sines.wav' outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_stochastic.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav' # write sounds files for harmonics, stochastic, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(yst, fs, outputFileStochastic) UF.wavwrite(y, fs, outputFile) return x, fs, hfreq, stocEnv, y
f0et=5 harmDevSlope=0.01 stocf=0.1 Ns = 512 H = 128 (fs, x) = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) timeScaling = np.array([0, 0, 2.138, 2.138-1.5, 3.146, 3.146]) yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreq, hmag, mYst, timeScaling) y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) UF.wavwrite(y,fs, 'hps-transformation.wav') plt.figure(figsize=(12, 9)) maxplotfreq = 14900.0 # plot the input sound plt.subplot(4,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.title('x (sax-phrase-short.wav') # plot spectrogram stochastic compoment plt.subplot(4,1,2) numFrames = int(mYst[:,0].size)
def analysis(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1): """ Analyze a sound with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize the harmonic plus stochastic model without original phases y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav' UF.wavwrite(y,fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot spectrogram stochastic compoment plt.subplot(3,1,2) numFrames = int(mYst[:,0].size) sizeEnv = int(mYst[0,:].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:,:sizeEnv*maxplotfreq/(.5*fs)+1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag, mYst
def exploreSineModelMultiRes(inputFile='../../sounds/orchestra.wav'): """ inputFile (string) = wav file including the path """ fs, x = UF.wavread(inputFile) # read input sound # First, let's check whether the new code returns same result as old one for mono-resolution case verifySineModelMultiRes() # Let's find optimal parameters in a reasonable range windows =['hanning', 'hamming', 'blackman', 'blackmanharris'] best = Best() for k in range(5,80,5): m = k * 100 + 1 # Window size in samples for window in windows: # Window type for t in range(-90,-100,-10): # Threshold for Ns in [512]: # size of fft used in synthesis n = 2 while n < m: n = n * 2 # size of fft used in analysis for nPower in range(0,3): # try out the analysis window closest to window size, and some larger ones for nAdditionalResolutions in range(0,4): # try out multi-resolution analysis windows W = np.array([window]) M = np.array([m]) N = np.array([n]) B = np.array([ ]) T = np.array([t]) log_m = np.log(float(m)) log_n = np.log(float(n)) log_f = np.log(fs/2.0) log_step = np.log(2) executeStep = True continueAddingResolutions = True for additionalResolution in range(0,nAdditionalResolutions): if continueAddingResolutions: scaledM = int(np.exp(log_m - log_step*(additionalResolution+1))) if scaledM % 2 == 0: scaledM = scaledM + 1 scaledN = int(np.exp(log_n - log_step*(additionalResolution+1))) if scaledN < scaledM: scaledN = scaledM appropriateScaledN = 2 while appropriateScaledN < scaledN: appropriateScaledN = appropriateScaledN * 2 frequencyBoundary = np.exp(log_f - (log_step*(nAdditionalResolutions - additionalResolution))) if scaledM < Ns: continueAddingResolutions = False if additionalResolution == 0: executeStep = False else: W = np.append(W,window) M = np.append(M,scaledM) N = np.append(N,appropriateScaledN) B = np.append(B,frequencyBoundary) T = np.append(T,t) if executeStep: best.calculateAndUpdate(x, fs, Ns, W, M, N, B, T) n = n * 2 print 'FILE:',inputFile print 'BEST:','diff =',best.diff,'for W =',best.W,', M =',best.M,', N =',best.N,', B =',best.B,', T =',best.T,', Ns =',best.Ns y_best = best.calculateAndUpdate(x, fs, best.Ns, best.W, best.M, best.N, best.B, best.T) outputFile = inputFile[:-4] + '_optimizedSineModel.wav' UF.wavwrite(y_best, fs, outputFile)
# compute the FO and the harmonics t = -97 minf0 = 310 maxf0 = 450 f0et = 4 nH = 70 harmDevSlope = 0.01 Ns = H * 4 minSineDur = 0.3 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) hfreqt = copy.copy(hfreq) hfreqt[:, 1:] = 0 yf0 = 4 * SM.sineModelSynth(hfreqt, hmag, hphase, Ns, H, fs) yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) UF.wavwrite(yf0, fs, "cello-phrase-f0.wav") UF.wavwrite(yh, fs, "cello-phrase-harmonics.wav") # plot the F0 on top of the spectrogram plt.figure(3, figsize=(16, 4.5)) maxplotfreq = 5000.0 harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms[:, 0] == 0] = np.nan numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1])) plt.plot(frmTime, harms[:, 0], linewidth=3, color="0") plt.xlabel("time (sec)") plt.ylabel("frequency (Hz)") plt.title("spectrogram + fundamental frequency")
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.ion() plt.show()
binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1])) plt.autoscale(tight=True) plt.subplot(4,1,3) numFrames = int(ytfreq[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) tracks = ytfreq*np.less(ytfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1) plt.autoscale(tight=True) plt.title('mY + time-scaled sine frequencies') maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mY[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:,:maxplotbin+1])) plt.autoscale(tight=True) plt.subplot(4,1,4) plt.plot(np.arange(y.size)/float(fs), y, 'b') plt.axis([0,y.size/float(fs),min(y),max(y)]) plt.title('y') plt.tight_layout() UF.wavwrite(y, fs, 'mridangam-sineModelTimeScale.wav') plt.savefig('sineModelTimeScale-mridangam.png') plt.show()
plt.subplot(311) numFrames = int(mX[:,0].size) frmTime = H1*np.arange(numFrames)/float(fs) binFreq = fs*np.arange(N1*maxplotfreq/fs)/N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N1*maxplotfreq/fs+1)])) plt.title('mX (orchestra.wav)') plt.autoscale(tight=True) plt.subplot(312) numFrames = int(mX2[:,0].size) frmTime = H1*np.arange(numFrames)/float(fs) N = 2*mX2[0,:].size binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX2[:,:int(N*maxplotfreq/fs+1)])) plt.title('mX2 (speech-male.wav)') plt.autoscale(tight=True) plt.subplot(313) numFrames = int(mY[:,0].size) frmTime = H1*np.arange(numFrames)/float(fs) binFreq = fs*np.arange(N1*maxplotfreq/fs)/N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:,:int(N1*maxplotfreq/fs+1)])) plt.title('mY') plt.autoscale(tight=True) plt.tight_layout() UF.wavwrite(y, fs, 'orchestra-speech-stftMorph.wav') plt.savefig('stftMorph-orchestra.png') plt.show()
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False)
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'): """ Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the f0 contour on the spectrogram and synthesize the f0 contour. Input: inputFile (string): wav file including the path Output: f0 (numpy array): array of the estimated fundamental frequency (f0) values """ ### Change these analysis parameter values window = "blackman" M = 4401 N = 8192 f0et = 7 t = -90.0 minf0 = 140 maxf0 = 210 ### Do not modify the code below H = 256 #fix hop size fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 startFrame = np.floor(0.5*fs/H) endFrame = np.ceil(4.0*fs/H) f0[:startFrame] = 0 f0[endFrame:] = 0 y = UF.sinewaveSynth(f0, 0.8, H, fs) UF.wavwrite(y, fs, 'synthF0Contour.wav') ## Code for plotting the f0 contour on top of the spectrogram # frequency range to plot maxplotfreq = 500.0 fontSize = 16 plot = 1 # plot = 1 plots the f0 contour, otherwise saves it to a file. fig = plt.figure() ax = fig.add_subplot(111) mX, pX = stft.stftAnal(x, fs, w, N, H) #using same params as used for analysis mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1]) timeStamps = np.arange(mX.shape[1])*H/float(fs) binFreqs = np.arange(mX.shape[0])*fs/float(N) plt.pcolormesh(timeStamps, binFreqs, mX) plt.plot(timeStamps, f0, color = 'k', linewidth=1.5) plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.autoscale(tight=True) plt.ylabel('Frequency (Hz)', fontsize = fontSize) plt.xlabel('Time (s)', fontsize = fontSize) plt.legend(('f0',)) xLim = ax.get_xlim() yLim = ax.get_ylim() ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0]))) if plot == 1: #save the plot too! plt.autoscale(tight=True) plt.show() else: fig.tight_layout() fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight') return f0
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analyze a sound with the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') if (hfreq.shape[1] > 0): plt.subplot(3,1,2) tracks = np.copy(hfreq) numFrames = tracks.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
plt.axis([0,x.size/float(fs),min(x),max(x)]) plt.subplot(412) numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(mX[0,:].size)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX, M=1024, N=1024, H=512') plt.autoscale(tight=True) plt.subplot(413) numFrames = int(pX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(pX[0,:].size)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.diff(np.transpose(pX),axis=0)) plt.title('pX derivative, M=1024, N=1024, H=512') plt.autoscale(tight=True) plt.subplot(414) plt.plot(np.arange(y.size)/float(fs), y,'b') plt.axis([0,y.size/float(fs),min(y),max(y)]) plt.title('y') plt.tight_layout() plt.savefig('stft-system.png') UF.wavwrite(y, fs, 'piano-stft.wav') plt.show()
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146])): """ transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file fs: sampling rate of input file hfreq, hmag: harmonic frequencies and magnitudes mYst: stochastic residual freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) timbrePreservation: 1 preserves original timbre, 0 it does not timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scaling the sound yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling) # synthesis from the trasformed hps representation y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav' UF.wavwrite(y,fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2,1,1) numFrames = int(ystocEnv[:,0].size) sizeEnv = int(ystocEnv[0,:].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = yhfreq*np.less(yhfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2,1,2) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def exploreSineModel(inputFile='../sms-tools/sounds/multisines.wav'): """ Input: inputFile (string) = wav file including the path Output: return True Discuss on the forum! """ window='hamming' # Window type M=3001 # Window size in sample N=4096 # FFT Size t=-80 # Threshold minSineDur=0.02 # minimum duration of a sinusoid maxnSines=15 # Maximum number of sinusoids at any time frame freqDevOffset=10 # minimum frequency deviation at 0Hz freqDevSlope=0.001 # slope increase of minimum frequency deviation Ns = 512 # size of fft used in synthesis H = 128 # hop size (has to be 1/4 of Ns) fs, x = UF.wavread(inputFile) # read input sound w = get_window(window, M) # compute analysis window # analyze the sound with the sinusoidal model tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3,1,2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tfreq[tfreq<=0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show() return True
def main(inputFile = '../../sounds/piano.wav', window = 'hamming', M = 1024, N = 1024, H = 512): """ analysis/synthesis using the STFT inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) H: hop size (at least 1/2 of analysis window size to have good overlap-add) """ # read input sound (monophonic with sampling rate of 44100) fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the magnitude and phase spectrogram mX, pX = STFT.stftAnal(x, w, N, H) # perform the inverse stft y = STFT.stftSynth(mX, pX, M, H) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_stft.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(4,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot magnitude spectrogram plt.subplot(4,1,2) numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram') plt.autoscale(tight=True) # plot the phase spectrogram plt.subplot(4,1,3) numFrames = int(pX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:,:N*maxplotfreq/fs+1],axis=1))) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('phase spectrogram (derivative)') plt.autoscale(tight=True) # plot the output sound plt.subplot(4,1,4) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3,1,2) tracks = np.copy(tfreq) tracks = tracks*np.less(tracks, maxplotfreq) tracks[tracks<=0] = np.nan numFrames = int(tracks[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, tfreq, tmag