def detect_peaks(self, min_peak_ratio=0.15): """-------------------------------------------------------------------- Finds the peak indices of the distribution. These are treated as tonic candidates in higher order functions. min_peak_ratio: The minimum ratio between the max peak value and the value of a detected peak --------------------------------------------------------------------""" assert 1 >= min_peak_ratio >= 0, \ 'min_peak_ratio should be between 0 (keep all peaks) and ' \ '1 (keep only the highest peak)' # Peak detection is handled by Essentia detector = std.PeakDetection() peak_bins, peak_vals = detector(essentia.array(self.vals)) # Essentia normalizes the positions to 1, they are converted here # to actual index values to be used in bins. peak_inds = np.array([int(round(bn * (len(self.bins) - 1))) for bn in peak_bins]) # if the object is pcd and there is a peak at zeroth index, # there will be another in the last index. Since a pcd is circular # remove the lower value if self.is_pcd() and peak_inds[0] == 0: if peak_vals[0] >= peak_vals[-1]: peak_inds = peak_inds[:-1] peak_vals = peak_vals[:-1] else: peak_inds = peak_inds[1:] peak_vals = peak_vals[1:] # remove peaks lower than the min_peak_ratio peak_bool = peak_vals / max(peak_vals) >= min_peak_ratio return peak_inds[peak_bool], peak_vals[peak_bool]
def vibFreq(pitchtrack, sp, hopsize): ''' :param pitchtrack: :param sp: samplerate of wave audio :param hopsize: :return: 3 frequencies of potential vibrato ''' if pitchtrack.dtype != np.float32: pitchtrack = pitchtrack.astype(np.float32) pitchtrackPad = pitchtrack[:] sampleRate = sp / hopsize ptlen = len(pitchtrack) fftSize = int(pow(2, ceil(log(ptlen) / log(2)))) # next pow of pitchtrack length if ptlen < fftSize: pitchtrackPad = np.append(pitchtrack, np.zeros(fftSize - ptlen, dtype=np.float32)) S = ess.Spectrum(size=fftSize)(pitchtrackPad) locs, amps = ess.PeakDetection(maxPeaks=3, orderBy='amplitude')(S) freqs = locs * (fftSize / 2 + 1) * sampleRate / fftSize return freqs[0]
def detect_peaks(self): detector = std.PeakDetection() peak_bins, peak_vals = detector(essentia.array(self.vals)) # Essentia normalizes the positions to 1 peak_idxs = [round(bn * (len(self.bins) - 1)) for bn in peak_bins] if(peak_idxs[0] == 0): peak_idxs = np.delete(peak_idxs, [len(peak_idxs) - 1]) peak_vals = np.delete(peak_vals, [len(peak_vals) - 1]) return peak_idxs, peak_vals
def testSyntheticNoveltyCurve(self): # in this test we assume the noveltyCurve is a perfect sine. The algorithm # should reproduce a sinusoid with peaks at the same positions as those # from the fake noveltycurve. Ploting may help to understand... frameSize = 4 # 4 seconds overlap = 2 frameRate = 44100 / 128. f = 10 # Hz f = f * float(frameRate) / nextPowerTwo( int(ceil(frameSize * frameRate))) expectedBpm = f * 60. # 101Bpm length = 3000 noveltyCurve = [ numpy.sin(2.0 * numpy.pi * f * x / frameRate + 0.135 * numpy.pi) for x in range(length) ] for idx, x in enumerate(noveltyCurve): if x < 0: noveltyCurve[idx] = 0 pool = self.computeBpmHistogram(noveltyCurve, frameSize, overlap, frameRate) #plot(noveltyCurve) #plot(pool['sinusoid'],'r') #show() noveltyPeaks = std.PeakDetection(interpolate=False)(noveltyCurve)[0] sinusoidPeaks = std.PeakDetection(interpolate=False)( pool['sinusoid'])[0] # depending on the framesize, hopsize, etc. the sinusoid is usually # larger than the novelty curve, so we need to trim the sinusoid's # peaks sinusoidPeaks = std.PeakDetection()( pool['sinusoid'])[0][:len(noveltyPeaks)] for p1, p2 in zip(noveltyPeaks, sinusoidPeaks): self.assertAlmostEqual(fabs(p1 - p2), 0, 5e-2) self.assertAlmostEqual(pool['bpm'], expectedBpm, 1e-3)
def mainFunction(feature, spec, varin): ''' main procedure of algorithm :param feature: observation * features :param spec: spectrogram :param fs: :param framesize: :param hopsize: :return: ''' fs = varin['fs'] framesize = varin['framesize'] hopsize = varin['hopsize'] try: max_band = varin['max_band'] except: max_band = 8 # l = [2,4,6,8,10] try: l = varin['l'] except: l = 2 try: h0 = varin['h0'] except: h0 = 0.6 # h1 = [0.6,0.8,1.0] try: h1 = varin['h1'] except: h1 = 0.08 # h2 = [0.5,0.6,0.7,0.8,0.9,1.0] try: h2 = varin['h2'] except: h2 = 0.0725 th_phone = varin['th_phone'] q = varin['q'] k = varin['k'] delta = varin['delta'] step2 = varin['step2'] plot = varin['plot'] energy = varin['energy'] M = 3 # legendre order PEAK = ess.PeakDetection(interpolate=False, maxPeaks=99999) T = feature.shape[0] # time D = feature.shape[1] # feature dimension legCoefs = np.zeros(shape=(T, M + 1)) G = np.zeros(shape=(max_band, T)) m_a0 = np.zeros(shape=(max_band, T)) th_e = np.mean(energy) / 100 for ii in range(max_band): # triangular filtering feature[:, ii] = triangularFilter(feature[:, ii]) # normalizing feature[:, ii] = (feature[:, ii] - min(feature[:, ii])) / ( max(feature[:, ii]) - min(feature[:, ii])) # segmentation, fitting legendre polynomial for jj in range(l, T - l - 1): f = np.zeros(shape=(T, 1)) seg = feature[jj - l:jj + l + 1, ii] seg = (seg - min(seg)) / (max(seg) - min(seg)) seg = (seg - 0.5) * 2 f[jj - l:jj + l + 1, 0] = seg # legendre coef coef = np.polynomial.legendre.legfit( np.arange(T).transpose(), f, M) legCoefs[jj, :] = coef.transpose() legCoefs = np.array(legCoefs) a0 = legCoefs[:, 0] m_a0[ii, :] = a0 if max(legCoefs[:, 1]) != min(legCoefs[:, 1]) and max( legCoefs[:, 2]) != min(legCoefs[:, 2]): a1 = (legCoefs[:, 1] - min(legCoefs[:, 1])) / ( max(legCoefs[:, 1]) - min(legCoefs[:, 1])) a2 = (legCoefs[:, 2] - min(legCoefs[:, 2])) / ( max(legCoefs[:, 2]) - min(legCoefs[:, 2])) else: continue # detect peaks of a1 p_a1, a_a1 = PEAK(np.array(a1, dtype=np.float32)) p_a1 = np.array(np.round(p_a1 * (T - 1)), dtype=np.int) # remove peaks which is silence for jj, ii_p in reversed(list(enumerate(p_a1))): if energy[ii_p] < th_e or a_a1[jj] < h1: p_a1 = np.delete(p_a1, jj) a_a1 = np.delete(a_a1, jj) # detect valleys of a2 p_a2_v, a_a2_v = PEAK(np.array(1 - a2, dtype=np.float32)) p_a2_v = np.array(np.round(p_a2_v * (T - 1)), dtype=np.int) # detect peaks of a2 p_a2_p, a_a2_p = PEAK(np.array(a2, dtype=np.float32)) p_a2_p = np.array(np.round(p_a2_p * (T - 1)), dtype=np.int) # BE change if len(p_a1) and len(p_a2_p): be_change_ii = BE_change(p_a1, p_a2_p, p_a2_v, feature[:, ii], h2=h2, frame_interval=5) G[ii, be_change_ii] = 1 # equation 5 g = np.sum(G, axis=0) phoneBoundary = phoneBoundaryStep1(g) if step2: d = phoneBoundaryStep2(phoneBoundary, m_a0, hopsize, fs, th_phone, q, k, delta) p_d, a_d = PEAK(np.array(d[:, 0], dtype=np.float32)) for ii, ii_a_d in enumerate(a_d): if ii_a_d > h0: phoneBoundary.append(p_d[ii]) phoneBoundary_time = np.array(phoneBoundary) * (hopsize / float(fs)) if plot: plt.figure() plt.plot(a1) plt.stem(p_a1, a_a1) plt.show() mX = spec mX = np.transpose(mX) maxplotfreq = 16001.0 eps = np.finfo(np.float).eps mXPlot = mX[:int(N * (maxplotfreq / fs)) + 1, :] binFreqs = np.arange(mXPlot.shape[0]) * fs / float(N) timestamps = np.arange(mXPlot.shape[1]) * (hopsize / float(fs)) plt.figure() plt.pcolormesh(timestamps, binFreqs, 20 * np.log10(mXPlot + eps)) plt.title('Hoang') plt.xlabel('time(s)') plt.ylabel('freq') for ii in range(len(phoneBoundary)): phoneBoundary_time = phoneBoundary[ii] * (hopsize / float(fs)) plt.axvline(phoneBoundary_time) plt.show() return phoneBoundary_time
# matplotlib without any blocking GUI import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np from smst.utils import audio (fs, x) = audio.read_wav('../../../sounds/piano.wav') start = 13860 M = 800 xp = x[start:start + M] / float(max(x[start:start + M])) r = ess.AutoCorrelation(normalization='standard')(xp) r = r / max(r) peaks = ess.PeakDetection(threshold=.11, interpolate=False, minPosition=.01)(r) plt.figure(1, figsize=(9, 7)) plt.subplot(211) plt.plot(np.arange(M) / float(fs), xp, lw=1.5) plt.axis([0, (M - 1) / float(fs), min(xp), max(xp)]) plt.xlabel('time (sec)') plt.ylabel('amplitude') plt.title('x (piano.wav)') plt.subplot(212) plt.plot(np.arange(M) / float(fs), r, 'r', lw=1.5) plt.plot(peaks[0] * (M - 1) / float(fs), peaks[1], 'x', color='k',
def plot(pool, title, outputfile='out.svg', subplot=111): ''' plots bars for each beat''' #computeSpectrum(pool['loudness']) ticks = pool['ticks'] #barSize = min([ticks[i+1] - ticks[i] for i in range(len(ticks[:-1]))])/2. barSize = 0.8 offset = barSize / 2. loudness = pool['loudness'] loudnessBand = pool['loudnessBandRatio'] # ticks x bands medianRatiosPerTick = [] meanRatiosPerTick = [] for tick, energy in enumerate(loudnessBand): medianRatiosPerTick.append(median(energy)) meanRatiosPerTick.append(mean(energy)) loudnessBand = copy.deepcopy(loudnessBand.transpose()) # bands x ticks #xcorr = std.CrossCorrelation(minLag=0, maxLag=16) #acorr = std.AutoCorrelation() #bandCorr = [] #for iBand, band in enumerate(loudnessBand): # bandCorr.append(acorr(essentia.array(band))) nBands = len(loudnessBand) nticks = len(loudness) maxRatiosPerBand = [] medianRatiosPerBand = [] meanRatiosPerBand = [] for idxBand, band in enumerate(loudnessBand): maxRatiosPerBand.append([0] * nticks) medianRatiosPerBand.append([0] * nticks) meanRatiosPerBand.append([0] * nticks) for idxTick in range(nticks): start = idxTick end = start + BEATWINDOW if (end > nticks): howmuch = end - nticks end = nticks - 1 start = end - howmuch if start < 0: start = 0 medianRatiosPerBand[idxBand][idxTick] = median(band[start:end]) maxRatiosPerBand[idxBand][idxTick] = max(band[start:end]) meanRatiosPerBand[idxBand][idxTick] = mean(band[start:end]) for iBand, band in enumerate(loudnessBand): for tick, ratio in enumerate(band): #if ratio < medianRatiosPerBand[iBand][tick] and\ # ratio <= medianRatiosPerTick[tick]: loudnessBand[iBand][tick]=0 bandThreshold = max(medianRatiosPerBand[iBand][tick], meanRatiosPerBand[iBand][tick]) tickThreshold = max(medianRatiosPerTick[tick], meanRatiosPerTick[tick]) if ratio < bandThreshold and ratio <= tickThreshold: loudnessBand[iBand][tick] = 0 else: loudnessBand[iBand][tick] *= loudness[tick] #if loudnessBand[iBand][tick] > 1 : loudnessBand[iBand][tick] = 1 acorr = std.AutoCorrelation() bandCorr = [] maxCorr = [] for iBand, band in enumerate(loudnessBand): bandCorr.append(acorr(essentia.array(band))) maxCorr.append(argmax(bandCorr[-1][2:]) + 2) # use as much window space as possible: pyplot.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95) pyplot.subplot(511) pyplot.imshow(bandCorr, cmap=pyplot.cm.hot, aspect='auto', origin='lower', interpolation='nearest') print 'max correlation', maxCorr sumCorr = [] for tick in range(nticks): total = 0 for band in bandCorr: total += band[tick] sumCorr.append(total) sumCorr[0] = 0 sumCorr[1] = 0 pyplot.subplot(512) maxAlpha = max(sumCorr) for i, val in enumerate(sumCorr): alpha = max(0, min(val / maxAlpha, 1)) pyplot.bar(i, 1, barSize, align='edge', bottom=0, alpha=alpha, color='r', edgecolor='w', linewidth=.3) print 'max sum correlation', argmax(sumCorr[2:]) + 2 hist = getHarmonics(sumCorr) maxHist = argmax(hist) print 'max histogram', maxHist #for idx,val in enumerate(hist): # if val < maxHist: hist[idx] = 0 pyplot.subplot(513) for i, val in enumerate(hist): pyplot.bar(i, val, barSize, align='edge', bottom=0, color='r', edgecolor='w', linewidth=.3) peakDetect = std.PeakDetection(maxPeaks=5, orderBy='amplitude', minPosition=0, maxPosition=len(sumCorr) - 1, range=len(sumCorr) - 1) peaks = peakDetect(sumCorr)[0] peaks = [round(x + 1e-15) for x in peaks] print 'Peaks:', peaks pyplot.subplot(514) maxAlpha = max(sumCorr) for i, val in enumerate(sumCorr): alpha = max(0, min(val / maxAlpha, 1)) pyplot.bar(i, val, barSize, align='edge', bottom=0, alpha=alpha, color='r', edgecolor='w', linewidth=.3) # multiply both histogram and sum corr to have a weighted histogram: wHist = essentia.array(hist) * sumCorr * acorr(loudness) maxHist = argmax(wHist) print 'max weighted histogram', maxHist pyplot.subplot(515) maxAlpha = max(wHist) for i, val in enumerate(wHist): alpha = max(0, min(val / maxAlpha, 1)) pyplot.bar(i, val, barSize, align='edge', bottom=0, alpha=alpha, color='r', edgecolor='w', linewidth=.3) pyplot.savefig(outputfile, dpi=300) #pyplot.show() return
def mainFunction(feature,spec,varin): ''' main procedure of algorithm :param feature: observation * features :param fs: :param framesize: :param hopsize: :return: ''' fs = varin['fs'] framesize = varin['framesize'] hopsize = varin['hopsize'] # h2 = [0.0,0.02,0.04,0.06,0.08,0.1] h2 = varin['h2'] # alpha = [0.2,0.4,0.6,0.8,1.0] alpha = varin['alpha'] # p_lambda = [0.2,0.4,0.6,0.8,1.0] mode_bic = BIC p_lambda = varin['p_lambda'] mode_bic = varin['mode_bic'] try: winmax = varin['winmax'] except: winmax = 0.35 plot = varin['plot'] if varin['feature_select'] == 'mfcc': mfcc = feature else: mfcc = varin['mfcc'] # # spectrogram init # winAnalysis = 'hann' # N = 2 * framesize # padding 1 time framesize # SPECTRUM = ess.Spectrum(size=N) # WINDOW = ess.Windowing(type=winAnalysis, zeroPadding=N-framesize) # highFrequencyBound = fs/2 if fs/2<11000 else 11000 # MFCC = ess.MFCC(sampleRate=fs,highFrequencyBound=highFrequencyBound,inputSize=framesize+1) # GFCC = ess.GFCC(sampleRate=fs,highFrequencyBound=highFrequencyBound) # mfcc = [] # gfcc = [] # mX = [] # # print 'calculating MFCC ... ...' # # for frame in ess.FrameGenerator(audio, frameSize=framesize, hopSize=hopsize): # # frame = WINDOW(frame) # mXFrame = SPECTRUM(frame) # mX.append(mXFrame) # bands,mfccFrame = MFCC(mXFrame) # mfccFrame = mfccFrame[1:] # bands,gfccFrame = GFCC(mXFrame) # gfccFrame = gfccFrame[1:] # # mfcc.append(mfccFrame) # gfcc.append(gfccFrame) # # mX = np.array(mX) # mfcc = np.array(mfcc) # gfcc = np.array(gfcc) T = mfcc.shape[0] # time D = mfcc.shape[1] # feature dimension winmax_frame = np.int(np.round(winmax*fs/hopsize)) PEAK = ess.PeakDetection(interpolate=False,maxPeaks=99999) # plpcc,plp = PLP(mX,modelorder=12,rasta=False) # all_MRCG,d_MRCG,dd_MRCG = MRCG(audio,fs=fs) print 'calculating delta mfcc ... ...' d_mfcc = Fdeltas(mfcc.transpose(), w=9) d_mfcc = np.transpose(d_mfcc) # Spectral variation function SVF = np.sqrt(np.sum(d_mfcc**2.0,axis=1)) SVF = (SVF - np.min(SVF))/(np.max(SVF)-np.min(SVF)) # peaks and valleys p_SVF,a_SVF = PEAK(np.array(SVF,dtype=np.float32)) p_SVF = np.array(np.round(p_SVF*(T-1)),dtype=np.int) p_v_SVF,a_v_SVF = PEAK(np.array(1-SVF,dtype=np.float32)) p_v_SVF = np.array(np.round(p_v_SVF*(T-1)),dtype=np.int) # heuristics p_SVF,a_SVF,p_v_SVF,a_v_SVF = heuristics(p_SVF,a_SVF,p_v_SVF,a_v_SVF,SVF,fs,hopsize,h2,alpha) index2Delete = [] if len(p_SVF) > 3: # BIC ii = 1 jj = 1 # dynamic windowing BIC while ii < len(p_SVF)-1: p_0 = p_SVF[ii-jj] p_1 = p_SVF[ii] p_2 = p_SVF[ii+1] p_0 = p_SVF[ii] - winmax_frame if p_1-p_0 > winmax_frame else p_0 # try to fix the small sample problem # p_0 = p_SVF[ii] - D if p_1-p_0 < D else p_0 # p_2 = p_SVF[ii] + D if p_2-p_1 < D else p_2 # # if p_0 < 0 or p_2 > p_SVF[-1]: # print p_0, p_1, p_2 # index2Delete.append(ii) # jj = 1 # ii += 1 # continue delta_BIC = BIC(feature[p_0:p_1,:],feature[p_1:p_2,:],feature[p_0:p_2,:],p_lambda,mode=mode_bic,shrinkage=2) if delta_BIC > 0: jj = 1 else: jj += 1 index2Delete.append(ii) ii += 1 if ii >= len(p_SVF)-1: break # print delta_BIC, p_0, p_1, p_2, p_BIC = np.delete(p_SVF,index2Delete) a_BIC = np.delete(a_SVF,index2Delete) timestamps_p_BIC= p_BIC * (hopsize/float(fs)) # plot if plot: N = 2 * framesize mX = spec mX = np.transpose(mX) maxplotfreq = 6001.0 eps = np.finfo(np.float).eps mXPlot = mX[:int(N*(maxplotfreq/fs))+1,:] binFreqs = np.arange(mXPlot.shape[0])*fs/float(N) timestamps_spec = np.arange(mXPlot.shape[1]) * (hopsize/float(fs)) timestamps = np.arange(T) * (hopsize/float(fs)) timestamps_p_SVF= p_SVF * (hopsize/float(fs)) timestamps_p_BIC= p_BIC * (hopsize/float(fs)) f, axarr = plt.subplots(3, sharex=True) axarr[0].plot(timestamps, SVF) axarr[0].stem(timestamps_p_SVF, a_SVF) axarr[0].set_ylabel('SVF') axarr[0].set_title('boundary heuristics') axarr[1].plot(timestamps, SVF) axarr[1].stem(timestamps_p_BIC, a_BIC) axarr[1].set_title('boundary DISTBIC') axarr[2].pcolormesh(timestamps_spec, binFreqs, 20*np.log10(mXPlot+eps)) axarr[2].set_title('spectrogram') for ii in range(0,len(timestamps_p_BIC)): axarr[2].axvline(timestamps_p_BIC[ii]) # plpcc = np.transpose(plp[:,1:]) # binFreqs = np.arange(plpcc.shape[0]) # axarr[4].pcolormesh(timestamps_spec, binFreqs, plpcc) # # plt.show() # # all_MRCG = np.transpose(all_MRCG) # binFreqs = np.arange(all_MRCG.shape[0]) # timestamps_spec = np.arange(all_MRCG.shape[1]) * (hopsize/float(fs)) # plt.figure() # plt.pcolormesh(timestamps_spec, binFreqs, all_MRCG) plt.show() return timestamps_p_BIC
polyphony = True three_chords = True profile = 'krumhansl' # diatonic, krumhansl, temperley, weichai, tonictriad, temperley2005, thpcp, faraldo # INSTANTIATE ALGORITHMS #======================= loader = estd.MonoLoader( filename=audiofile sampleRate=sample_rate) window = estd.Windowing( size=window_size) rfft = estd.Spectrum( size=window_size) peaks = estd.PeakDetection( interpolate=interpolate, threshold=threshold, minPosition=(min_frequency/nyquist), maxPosition=(max_frequency/nyquist), maxPeaks=maxPeaks) hpcp = estd.HPCP( bandPreset=band_preset, harmonics = harmonics, minFrequency=min_frequency, maxFrequency=max_frequency, nonLinear=non_linear, normalized=normalize, sampleRate=sample_rate, referenceFrequency=reference_frequency, weightType=weight_type, windowSize=weight_window_size) key = estd.Key( numHarmonics=harmonics_key,
def mainFunction(filename,fs,framesize,hopsize,h2,alpha,p_lambda): ''' main procedure of algorithm :param filename: :param fs: :param framesize: :param hopsize: :return: ''' # load audio audio = ess.MonoLoader(filename = filename, sampleRate = fs)() # spectrogram init winAnalysis = 'hann' N = 2 * framesize # padding 1 time framesize SPECTRUM = ess.Spectrum(size=N) WINDOW = ess.Windowing(type=winAnalysis, zeroPadding=N-framesize) highFrequencyBound = fs/2 if fs/2<11000 else 11000 MFCC = ess.MFCC(sampleRate=fs,highFrequencyBound=highFrequencyBound) PEAK = ess.PeakDetection(interpolate=False,maxPeaks=99999) mfcc = [] mX = [] print 'calculating MFCC ... ...' for frame in ess.FrameGenerator(audio, frameSize=framesize, hopSize=hopsize): frame = WINDOW(frame) mXFrame = SPECTRUM(frame) mX.append(mXFrame) bands,mfccFrame = MFCC(mXFrame) mfccFrame = mfccFrame[1:] mfcc.append(mfccFrame) mX = np.array(mX) mX = np.transpose(mX) mfcc = np.array(mfcc) T = mfcc.shape[0] # time D = mfcc.shape[1] # feature dimension print 'calculating delta mfcc ... ...' d_mfcc = Fdeltas(mfcc.transpose(), w=9) d_mfcc = np.transpose(d_mfcc) # Spectral variation function SVF = np.sqrt(np.sum(d_mfcc**2.0,axis=1)) SVF = (SVF - np.min(SVF))/(np.max(SVF)-np.min(SVF)) # peaks and valleys p_SVF,a_SVF = PEAK(np.array(SVF,dtype=np.float32)) p_SVF = np.array(np.round(p_SVF*(T-1)),dtype=np.int) p_v_SVF,a_v_SVF = PEAK(np.array(1-SVF,dtype=np.float32)) p_v_SVF = np.array(np.round(p_v_SVF*(T-1)),dtype=np.int) # heuristics p_SVF,a_SVF,p_v_SVF,a_v_SVF = heuristics(p_SVF,a_SVF,p_v_SVF,a_v_SVF,SVF,fs,hopsize,h2,alpha) index2Delete = [] if len(p_SVF) > 3: # BIC ii = 1 jj = 1 # dynamic windowing BIC while ii < len(p_SVF)-1: p_0 = p_SVF[ii-jj] p_1 = p_SVF[ii] p_2 = p_SVF[ii+1] delta_ABF2 = ABF2(d_mfcc[p_0:p_1,:],d_mfcc[p_1:p_2,:],d_mfcc[p_0:p_2,:],p_lambda) if delta_ABF2 > 0: jj = 1 else: jj += 1 index2Delete.append(ii) ii += 1 if ii >= len(p_SVF)-1: break # print delta_BIC, p_0, p_1, p_2, p_ABF2 = np.delete(p_SVF,index2Delete) a_ABF2 = np.delete(a_SVF,index2Delete)
import numpy as np from sklearn.cluster import KMeans from sklearn import preprocessing from sklearn.externals import joblib from sklearn.cross_validation import train_test_split, StratifiedKFold from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.svm import LinearSVC from sklearn.svm import SVC from vuvAlgos import featureVUV, consonantInterval from scipy.spatial.distance import pdist, squareform import matplotlib.pyplot as plt import essentia.standard as ess PEAK = ess.PeakDetection(maxPeaks=10) def boundaryFrame(phoSeg, varin): # boundary frame from phoneme segmentation, [[pho_start,pho_end],[pho_s,pho_e],...] boundary = [0.0] start_time_syllable = phoSeg[0][0] if len(phoSeg) > 1: for ii in range(len(phoSeg) - 1): end_frame = int( round((phoSeg[ii][1] - start_time_syllable) * varin['fs'] / varin['hopsize'])) boundary.append(end_frame) end_frame_syllable = int( round((phoSeg[-1][1] - start_time_syllable) * varin['fs'] /
def vibrato(pitch): sampleRate = 44100/256 frameSize = int(round(0.5*sampleRate)) frameSize = frameSize if len(pitch)>=frameSize else len(pitch) # dynamic frameSize fftSize = 4*frameSize dBLobe = 15 dBSecondLobe = 20 minFreq = 2.0 maxFreq = 8.0 minExt = 30.0 maxExt = 250.0 fRef = 55.0 winAnalysis = 'hann' # INIT f0 = [] for f in pitch: if f < 0: f0.append(0) else: f0.append(f) # GET CONTOUR SEGMENTS startC=[] endC=[] if f0[0]>0: startC.append(0) for ii in range(len(f0)-1): if (abs(f0[ii+1]))>0 and f0[ii]==0: startC.append(ii+1) if (f0[ii+1]==0 and abs(f0[ii])>0): endC.append(ii) if len(endC)<len(startC): endC.append(len(f0)) WINDOW = ess.Windowing(type=winAnalysis, size = frameSize, zeroPadding=fftSize-frameSize) # vibrato annotations vibSec = [0]*len(f0) vibFreq = [0]*len(f0) vibFreqMin = [0]*len(f0) vibFreqMax = [0]*len(f0) vibExt = [0]*len(f0) vibBRaw = [] vibFreqMinRaw = [] vibFreqMaxRaw = [] # ANALYSE EACH SEGMENT for ii in range (len(startC)): # get segment in cents contour = f0[startC[ii]:endC[ii]] # contour = 1200*np.log2(np.array(contour)/fRef) # it's already in midi # frame-wise FFT for jj in range(0,len(contour)-frameSize,int(round(frameSize/2))): frame = contour[jj:jj+frameSize] extend = max(frame)-min(frame) minFrame = min(frame) maxFrame = max(frame) frame = frame-np.mean(frame) frame = ess.MovingAverage(size=5)(frame) frame = WINDOW(frame) # extent constraint #if extend<minExt or extend>maxExt: # continue S = ess.Spectrum(size = fftSize)(frame) #fig = plt.figure() #ax = fig.add_subplot(211) #plt.plot(S) locs, amp = ess.PeakDetection(maxPeaks = 3, orderBy = 'amplitude')(S) freqs=locs*(fftSize/2+1)*sampleRate/fftSize #print freqs, amp if len(freqs)<=0: continue if freqs[0]<minFreq or freqs[0]>maxFreq: # strongest peak is not in considered range continue if len(freqs)>1: # there is a second peak if freqs[1]>minFreq and freqs[1]<maxFreq: continue if 20*np.log10(amp[0]/amp[1])<dBLobe: continue if len(freqs)>2: #there is a third peak if freqs[2]>minFreq and freqs[2]<maxFreq: # it is also in the vibrato range continue if 20*np.log10(amp[0]/amp[2])<dBSecondLobe: continue vibSec[startC[ii]+jj:startC[ii]+jj+frameSize] = f0[startC[ii]+jj:startC[ii]+jj+frameSize] vibExt[startC[ii]+jj:startC[ii]+jj+frameSize] = [extend]*frameSize vibFreq[startC[ii]+jj:startC[ii]+jj+frameSize] = [freqs[0]]*frameSize vibFreqMin[startC[ii]+jj:startC[ii]+jj+frameSize] = [minFrame]*frameSize vibFreqMax[startC[ii]+jj:startC[ii]+jj+frameSize] = [maxFrame]*frameSize vibBRaw.append((startC[ii]+jj, startC[ii]+jj+frameSize)) vibFreqMinRaw.append(minFrame) vibFreqMaxRaw.append(maxFrame) # section filter toRemove = vibratoSectionFilter(vibBRaw, vibFreqMinRaw, vibFreqMaxRaw) for tr in toRemove: start = vibBRaw[tr][0] end = vibBRaw[tr][1]-frameSize/2 vibSec[start:end] = [0]*(frameSize/2) vibExt[start:end] = [0]*(frameSize/2) vibFreq[start:end] = [0]*(frameSize/2) vibFreqMin[start:end] = [0]*(frameSize/2) vibFreqMax[start:end] = [0]*(frameSize/2) # write vibrato sections boundary vibB = [] vibBExt = [] vibBFreq = [] vibBFreqMin = [] vibBFreqMax = [] if vibSec[0]>0: vibB.append(0) for ii in range(len(f0)-1): if abs(vibSec[ii+1])>0 and vibSec[ii]==0: vibB.append(ii+1) if vibSec[ii+1]==0 and vibSec[ii]>0: vibB.append(ii) if vibSec[-1]>0: endC.append(len(f0)-1) assert (len(vibB)%2==0), "vib boundary should be even!" for ii in range(len(vibB)/2): vibBExt.append(vibExt[vibB[2*ii]:vibB[2*ii+1]+1]) vibBFreq.append(vibFreq[vibB[2*ii]:vibB[2*ii+1]+1]) vibBFreqMin.append(vibFreqMin[vibB[2*ii]:vibB[2*ii+1]+1]) vibBFreqMax.append(vibFreqMax[vibB[2*ii]:vibB[2*ii+1]+1]) return vibB, vibBExt, vibBFreq, vibBFreqMin, vibBFreqMax