Esempio n. 1
0
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow = 0.5, Weight = 0.5, plot = False):
	'''
	Event Detection (silence removal)
	ARGUMENTS:
		 - x:			the input audio signal
		 - Fs:			sampling freq
		 - stWin, stStep:	window size and step in seconds
		 - smoothWindow:	(optinal) smooth window (in seconds)
		 - Weight:		(optinal) weight factor (0 < Weight < 1) the higher, the more strict
		 - plot:		(optinal) True if results are to be plotted
	RETURNS:
		 - segmentLimits:	list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that 
					the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds 
	'''

	if Weight>=1:
		Weight = 0.99;
	if Weight<=0:
		Weight = 0.01;

	# Step 1: feature extraction
	x = audioBasicIO.stereo2mono(x);						# convert to mono
	ShortTermFeatures = aF.stFeatureExtraction(x, Fs, stWin*Fs, stStep*Fs)		# extract short-term features	

	# Step 2: train binary SVM classifier of low vs high energy frames
	EnergySt = ShortTermFeatures[1, :]						# keep only the energy short-term sequence (2nd feature)
	E = numpy.sort(EnergySt)							# sort the energy feature values:
	L1 = int(len(E)/10)								# number of 10% of the total short-term windows
	T1 = numpy.mean(E[0:L1])							# compute "lower" 10% energy threshold 
	T2 = numpy.mean(E[-L1:-1])							# compute "higher" 10% energy threshold
	Class1 = ShortTermFeatures[:,numpy.where(EnergySt<T1)[0]]			# get all features that correspond to low energy
	Class2 = ShortTermFeatures[:,numpy.where(EnergySt>T2)[0]]			# get all features that correspond to high energy
	featuresSS = [Class1.T, Class2.T];						# form the binary classification task and ...
	[featuresNormSS, MEANSS, STDSS] = aT.normalizeFeatures(featuresSS)		# normalize and ...
	SVM = aT.trainSVM(featuresNormSS, 1.0)						# train the respective SVM probabilistic model (ONSET vs SILENCE)

	# Step 3: compute onset probability based on the trained SVM
	ProbOnset = []
	for i in range(ShortTermFeatures.shape[1]):					# for each frame
		curFV = (ShortTermFeatures[:,i] - MEANSS) / STDSS			# normalize feature vector
		ProbOnset.append(SVM.pred_probability(curFV)[1])			# get SVM probability (that it belongs to the ONSET class)
	ProbOnset = numpy.array(ProbOnset)
	ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep)			# smooth probability

	# Step 4A: detect onset frame indices:
	ProbOnsetSorted = numpy.sort(ProbOnset)						# find probability Threshold as a weighted average of top 10% and lower 10% of the values
	Nt = ProbOnsetSorted.shape[0] / 10;	
	T = (numpy.mean( (1-Weight)*ProbOnsetSorted[0:Nt] ) + Weight*numpy.mean(ProbOnsetSorted[-Nt::]) )

	MaxIdx = numpy.where(ProbOnset>T)[0];						# get the indices of the frames that satisfy the thresholding
	i = 0;
	timeClusters = []
	segmentLimits = []

	# Step 4B: group frame indices to onset segments
	while i<len(MaxIdx):								# for each of the detected onset indices
		curCluster = [MaxIdx[i]]
		if i==len(MaxIdx)-1:
			break		
		while MaxIdx[i+1] - curCluster[-1] <= 2:
			curCluster.append(MaxIdx[i+1])
			i += 1
			if i==len(MaxIdx)-1:
				break
		i += 1
		timeClusters.append(curCluster)
		segmentLimits.append([curCluster[0]*stStep, curCluster[-1]*stStep])

	# Step 5: Post process: remove very small segments:
	minDuration = 0.2;
	segmentLimits2 = []
	for s in segmentLimits:
		if s[1] - s[0] > minDuration:
			segmentLimits2.append(s)
	segmentLimits = segmentLimits2;

	if plot:
		timeX = numpy.arange(0, x.shape[0] / float(Fs) , 1.0/Fs)

		plt.subplot(2,1,1); plt.plot(timeX, x)
		for s in segmentLimits:
			plt.axvline(x=s[0]); 
			plt.axvline(x=s[1]); 
		plt.subplot(2,1,2); plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset);
		plt.title('Signal')
		for s in segmentLimits:
			plt.axvline(x=s[0]); 
			plt.axvline(x=s[1]); 
		plt.title('SVM Probability')
		plt.show()

	return segmentLimits
def silenceRemoval(x,
                   fs,
                   st_win,
                   st_step,
                   smoothWindow=0.5,
                   weight=0.5,
                   plot=False):
    '''
	Event Detection (silence removal)
	ARGUMENTS:
		 - x:                the input audio signal
		 - fs:               sampling freq
		 - st_win, st_step:    window size and step in seconds
		 - smoothWindow:     (optinal) smooth window (in seconds)
		 - weight:           (optinal) weight factor (0 < weight < 1) the higher, the more strict
		 - plot:             (optinal) True if results are to be plotted
	RETURNS:
		 - seg_limits:    list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that
					the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds
	'''

    if weight >= 1:
        weight = 0.99
    if weight <= 0:
        weight = 0.01

    # Step 1: feature extraction
    x = audioBasicIO.stereo2mono(x)
    st_feats, _ = aF.stFeatureExtraction(x, fs, st_win * fs, st_step * fs)

    # Step 2: train binary svm classifier of low vs high energy frames
    # keep only the energy short-term sequence (2nd feature)
    st_energy = st_feats[1, :]
    en = numpy.sort(st_energy)
    # number of 10% of the total short-term windows
    l1 = int(len(en) / 10)
    # compute "lower" 10% energy threshold
    t1 = numpy.mean(en[0:l1]) + 0.000000000000001
    # compute "higher" 10% energy threshold
    t2 = numpy.mean(en[-l1:-1]) + 0.000000000000001
    # get all features that correspond to low energy
    class1 = st_feats[:, numpy.where(st_energy <= t1)[0]]
    # get all features that correspond to high energy
    class2 = st_feats[:, numpy.where(st_energy >= t2)[0]]
    # form the binary classification task and ...
    faets_s = [class1.T, class2.T]
    # normalize and train the respective svm probabilistic model
    # (ONSET vs SILENCE)
    [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s)
    svm = aT.trainSVM(faets_s_norm, 1.0)

    # Step 3: compute onset probability based on the trained svm
    prob_on_set = []
    for i in range(st_feats.shape[1]):
        # for each frame
        cur_fv = (st_feats[:, i] - means_s) / stds_s
        # get svm probability (that it belongs to the ONSET class)
        prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1])
    prob_on_set = numpy.array(prob_on_set)
    # smooth probability:
    prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step)

    # Step 4A: detect onset frame indices:
    prog_on_set_sort = numpy.sort(prob_on_set)
    # find probability Threshold as a weighted average
    # of top 10% and lower 10% of the values
    Nt = int(prog_on_set_sort.shape[0] / 10)
    T = (numpy.mean((1 - weight) * prog_on_set_sort[0:Nt]) +
         weight * numpy.mean(prog_on_set_sort[-Nt::]))

    max_idx = numpy.where(prob_on_set > T)[0]
    # get the indices of the frames that satisfy the thresholding
    i = 0
    time_clusters = []
    seg_limits = []

    # Step 4B: group frame indices to onset segments
    while i < len(max_idx):
        # for each of the detected onset indices
        cur_cluster = [max_idx[i]]
        if i == len(max_idx) - 1:
            break
        while max_idx[i + 1] - cur_cluster[-1] <= 2:
            cur_cluster.append(max_idx[i + 1])
            i += 1
            if i == len(max_idx) - 1:
                break
        i += 1
        time_clusters.append(cur_cluster)
        seg_limits.append(
            [cur_cluster[0] * st_step, cur_cluster[-1] * st_step])

    # Step 5: Post process: remove very small segments:
    min_dur = 0.2
    seg_limits_2 = []
    for s in seg_limits:
        if s[1] - s[0] > min_dur:
            seg_limits_2.append(s)
    seg_limits = seg_limits_2

    if plot:
        timeX = numpy.arange(0, x.shape[0] / float(fs), 1.0 / fs)

        plt.subplot(2, 1, 1)
        plt.plot(timeX, x)
        for s in seg_limits:
            plt.axvline(x=s[0])
            plt.axvline(x=s[1])
        plt.subplot(2, 1, 2)
        plt.plot(numpy.arange(0, prob_on_set.shape[0] * st_step, st_step),
                 prob_on_set)
        plt.title('Signal')
        for s in seg_limits:
            plt.axvline(x=s[0])
            plt.axvline(x=s[1])
        plt.title('svm Probability')
        plt.show()

    return seg_limits