def getFeaturesFromWave(self, fname): fs, signal = scipy.io.wavfile.read(fname) window_len = self.frame_size*fs # Number of samples in frame_size sample_shift = self.frame_shift*fs # Number of samples shifted try: if signal.shape[1]: signal = numpy.mean(signal, axis=1) except: print "single column" segmentLimits = rs.silenceRemoval(signal, fs, self.frame_size, self.frame_shift) segmentLimits = numpy.asarray(segmentLimits) data = rs.nonsilentRegions(segmentLimits, fs, signal) stfeatures = featureExtraction.stFeatureExtraction(data, fs, window_len, sample_shift ) lpc = LPC.extract((fs, data)) featuresT = stfeatures.transpose() featuresT = numpy.concatenate((featuresT, lpc), axis = 1) return featuresT
def getFeaturesFromWave(self, fname): fs, signal = scipy.io.wavfile.read(fname) window_len = self.frame_size * fs # Number of samples in frame_size sample_shift = self.frame_shift * fs # Number of samples shifted try: if signal.shape[1]: signal = numpy.mean(signal, axis=1) except: print "single column" segmentLimits = rs.silenceRemoval(signal, fs, self.frame_size, self.frame_shift) segmentLimits = numpy.asarray(segmentLimits) data = rs.nonsilentRegions(segmentLimits, fs, signal) stfeatures = featureExtraction.stFeatureExtraction( data, fs, window_len, sample_shift) lpc = LPC.extract((fs, data)) featuresT = stfeatures.transpose() featuresT = numpy.concatenate((featuresT, lpc), axis=1) return featuresT
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow=0.5, Weight=0.5, plot=False): ''' Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - Fs: sampling freq - stWin, stStep: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - Weight: (optinal) weight factor (0 < Weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - segmentLimits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds ''' if Weight >= 1: Weight = 0.99 if Weight <= 0: Weight = 0.01 # Step 1: feature extraction ShortTermFeatures = fe.stFeatureExtraction( x, Fs, stWin * Fs, stStep * Fs) # extract short-term features # Step 2: train binary SVM classifier of low vs high energy frames EnergySt = ShortTermFeatures[ 1, :] # keep only the energy short-term sequence (2nd feature) E = numpy.sort(EnergySt) # sort the energy feature values: L1 = int(len(E) / 10) # number of 10% of the total short-term windows T1 = numpy.mean(E[0:L1]) # compute "lower" 10% energy threshold T2 = numpy.mean(E[-L1:-1]) # compute "higher" 10% energy threshold Class1 = ShortTermFeatures[:, numpy.where( EnergySt < T1)[0]] # get all features that correspond to low energy Class2 = ShortTermFeatures[:, numpy.where( EnergySt > T2)[0]] # get all features that correspond to high energy featuresSS = [Class1.T, Class2.T] # form the binary classification task and ... [featuresNormSS, MEANSS, STDSS] = normalizeFeatures(featuresSS) # normalize and ... SVM = trainSVM( featuresNormSS, 1.0) # train the respective SVM probabilistic model (ONSET vs SILENCE) # Step 3: compute onset probability based on the trained SVM ProbOnset = [] for i in range(ShortTermFeatures.shape[1]): # for each frame curFV = (ShortTermFeatures[:, i] - MEANSS) / STDSS # normalize feature vector ProbOnset.append( SVM.pred_probability(curFV) [1]) # get SVM probability (that it belongs to the ONSET class) ProbOnset = numpy.array(ProbOnset) ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep) # smooth probability # Step 4A: detect onset frame indices: ProbOnsetSorted = numpy.sort( ProbOnset ) # find probability Threshold as a weighted average of top 10% and lower 10% of the values Nt = ProbOnsetSorted.shape[0] / 10 T = (numpy.mean((1 - Weight) * ProbOnsetSorted[0:Nt]) + Weight * numpy.mean(ProbOnsetSorted[-Nt::])) MaxIdx = numpy.where(ProbOnset > T)[ 0] # get the indices of the frames that satisfy the thresholding i = 0 timeClusters = [] segmentLimits = [] # Step 4B: group frame indices to onset segments while i < len(MaxIdx): # for each of the detected onset indices curCluster = [MaxIdx[i]] if i == len(MaxIdx) - 1: break while MaxIdx[i + 1] - curCluster[-1] <= 2: curCluster.append(MaxIdx[i + 1]) i += 1 if i == len(MaxIdx) - 1: break i += 1 timeClusters.append(curCluster) segmentLimits.append([curCluster[0] * stStep, curCluster[-1] * stStep]) # Step 5: Post process: remove very small segments: minDuration = 0.2 segmentLimits2 = [] for s in segmentLimits: if s[1] - s[0] > minDuration: segmentLimits2.append(s) segmentLimits = segmentLimits2 if plot: timeX = numpy.arange(0, x.shape[0] / float(Fs), 1.0 / Fs) plt.subplot(2, 1, 1) plt.plot(timeX, x) for s in segmentLimits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.subplot(2, 1, 2) plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset) plt.title('Signal') for s in segmentLimits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.title('SVM Probability') plt.show() return segmentLimits
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow=0.5, Weight=0.5, plot=False): ''' Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - Fs: sampling freq - stWin, stStep: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - Weight: (optinal) weight factor (0 < Weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - segmentLimits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds ''' if Weight >= 1: Weight = 0.99 if Weight <= 0: Weight = 0.01 # Step 1: feature extraction ShortTermFeatures = fe.stFeatureExtraction(x, Fs, stWin * Fs, stStep * Fs) # extract short-term features # Step 2: train binary SVM classifier of low vs high energy frames EnergySt = ShortTermFeatures[1, :] # keep only the energy short-term sequence (2nd feature) E = numpy.sort(EnergySt) # sort the energy feature values: L1 = int(len(E) / 10) # number of 10% of the total short-term windows T1 = numpy.mean(E[0:L1]) # compute "lower" 10% energy threshold T2 = numpy.mean(E[-L1:-1]) # compute "higher" 10% energy threshold Class1 = ShortTermFeatures[:, numpy.where(EnergySt < T1)[0]] # get all features that correspond to low energy Class2 = ShortTermFeatures[:, numpy.where(EnergySt > T2)[0]] # get all features that correspond to high energy featuresSS = [Class1.T, Class2.T] # form the binary classification task and ... [featuresNormSS, MEANSS, STDSS] = normalizeFeatures(featuresSS) # normalize and ... SVM = trainSVM(featuresNormSS, 1.0) # train the respective SVM probabilistic model (ONSET vs SILENCE) # Step 3: compute onset probability based on the trained SVM ProbOnset = [] for i in range(ShortTermFeatures.shape[1]): # for each frame curFV = (ShortTermFeatures[:, i] - MEANSS) / STDSS # normalize feature vector ProbOnset.append(SVM.pred_probability(curFV)[1]) # get SVM probability (that it belongs to the ONSET class) ProbOnset = numpy.array(ProbOnset) ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep) # smooth probability # Step 4A: detect onset frame indices: ProbOnsetSorted = numpy.sort(ProbOnset) # find probability Threshold as a weighted average of top 10% and lower 10% of the values Nt = ProbOnsetSorted.shape[0] / 10 T = (numpy.mean((1 - Weight) * ProbOnsetSorted[0:Nt]) + Weight * numpy.mean(ProbOnsetSorted[-Nt::])) MaxIdx = numpy.where(ProbOnset > T)[0] # get the indices of the frames that satisfy the thresholding i = 0 timeClusters = [] segmentLimits = [] # Step 4B: group frame indices to onset segments while i < len(MaxIdx): # for each of the detected onset indices curCluster = [MaxIdx[i]] if i == len(MaxIdx)-1: break while MaxIdx[i+1] - curCluster[-1] <= 2: curCluster.append(MaxIdx[i+1]) i += 1 if i == len(MaxIdx)-1: break i += 1 timeClusters.append(curCluster) segmentLimits.append([curCluster[0] * stStep, curCluster[-1] * stStep]) # Step 5: Post process: remove very small segments: minDuration = 0.2 segmentLimits2 = [] for s in segmentLimits: if s[1] - s[0] > minDuration: segmentLimits2.append(s) segmentLimits = segmentLimits2 if plot: timeX = numpy.arange(0, x.shape[0] / float(Fs), 1.0 / Fs) plt.subplot(2, 1, 1) plt.plot(timeX, x) for s in segmentLimits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.subplot(2, 1, 2) plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset) plt.title('Signal') for s in segmentLimits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.title('SVM Probability') plt.show() return segmentLimits