Exemple #1
0
    def extractFeatures(self, eventsList, Fs, snr):
        feature = []
        for event in eventsList:
            frame = event.getData()

            F = audioFeatureExtraction.stFeatureExtraction(
                frame, Fs, self.frameSize * Fs, self.frameStep * Fs)
            raw_feature = F[:self.discard, :].T

            tmp = []
            for j in range(0, raw_feature.shape[1]
                           ):  # compute median and med for each columns
                feature_column = raw_feature[:, j]
                median = np.median(raw_feature[:, j])
                median_absolute_deviation = np.median(
                    np.abs(feature_column - median))
                tmp.append(median)
                tmp.append(median_absolute_deviation)

            tmp.append(event.getTarget())  # add class label
            tmp.append(raw_feature.shape[0])  # add number of frame per signal
            tmp.append(snr)  # add snr
            tmp.append(event.getId())  # add id
            tmp.append(event.getBackground())  # add background type
            feature.append(tmp)

        return feature
def beatExtractionWrapper(wavFileName, plot):
    if not os.path.isfile(wavFileName):
        raise Exception("Input audio file not found!")
    [Fs, x] = audioBasicIO.readAudioFile(wavFileName)
    F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs)
    BPM, ratio = aF.beatExtraction(F, 0.050, plot)
    print "Beat: {0:d} bpm ".format(int(BPM))
    print "Ratio: {0:.2f} ".format(ratio)
def beatExtractionWrapper(wavFileName, plot):
    if not os.path.isfile(wavFileName):
        raise Exception("Input audio file not found!")
    [Fs, x] = audioBasicIO.readAudioFile(wavFileName)
    F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs)
    BPM, ratio = aF.beatExtraction(F, 0.050, plot)
    print "Beat: {0:d} bpm ".format(int(BPM))
    print "Ratio: {0:.2f} ".format(ratio)
Exemple #4
0
def beatExtractionWrapper(wav_file, plot):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs)
    bpm, ratio = aF.beatExtraction(F, 0.050, plot)
    print("Beat: {0:d} bpm ".format(int(bpm)))
    print("Ratio: {0:.2f} ".format(ratio))
Exemple #5
0
def musicThumbnailing(x, Fs, shortTermSize=1.0, shortTermStep=0.5, thumbnailSize=10.0):
	'''
	This function detects instances of the most representative part of a music recording, also called "music thumbnails".
	A technique similar to the one proposed in [1], however a wider set of audio features is used instead of chroma features.
	In particular the following steps are followed:
	 - Extract short-term audio features. Typical short-term window size: 1 second
	 - Compute the self-silimarity matrix, i.e. all pairwise similarities between feature vectors
 	 - Apply a diagonal mask is as a moving average filter on the values of the self-similarty matrix. 
	   The size of the mask is equal to the desirable thumbnail length.
 	 - Find the position of the maximum value of the new (filtered) self-similarity matrix.
	   The audio segments that correspond to the diagonial around that position are the selected thumbnails
	

	ARGUMENTS:
	 - x:			input signal
	 - Fs:			sampling frequency
	 - shortTermSize: 	window size (in seconds)
	 - shortTermStep:	window step (in seconds)
	 - thumbnailSize:	desider thumbnail size (in seconds)
	
	RETURNS:
	 - A1:			beginning of 1st thumbnail (in seconds)
	 - A2:			ending of 1st thumbnail (in seconds)
	 - B1:			beginning of 2nd thumbnail (in seconds)
	 - B2:			ending of 2nd thumbnail (in seconds)

	USAGE EXAMPLE:
  	 import audioFeatureExtraction as aF
	 [Fs, x] = basicIO.readAudioFile(inputFile)
	 [A1, A2, B1, B2] = musicThumbnailing(x, Fs)

	[1] Bartsch, M. A., & Wakefield, G. H. (2005). Audio thumbnailing of popular music using chroma-based representations. 
	Multimedia, IEEE Transactions on, 7(1), 96-104.
	'''
	x = audioBasicIO.stereo2mono(x);
	# feature extraction:
	stFeatures = aF.stFeatureExtraction(x, Fs, Fs*shortTermSize, Fs*shortTermStep)

	# self-similarity matrix
	S = selfSimilarityMatrix(stFeatures)

	# moving filter:
	M = int(round(thumbnailSize / shortTermStep))
	B = numpy.eye(M,M)
	S = scipy.signal.convolve2d(S, B, 'valid')


	# post-processing (remove main diagonal elements)
	MIN = numpy.min(S)
	for i in range(S.shape[0]):
		for j in range(S.shape[1]):
			if abs(i-j) < 5.0 / shortTermStep or i > j:
				S[i,j] = MIN;

	# find max position:
	maxVal = numpy.max(S)
	I = numpy.argmax(S)
	[I, J] = numpy.unravel_index(S.argmax(), S.shape)

	# expand:
	i1 = I; i2 = I
	j1 = J; j2 = J

	while i2-i1<M:
		if S[i1-1, j1-1] > S[i2+1,j2+1]:
			i1 -= 1
			j1 -= 1
		else:
			i2 += 1
			j2 += 1


	return (shortTermStep*i1, shortTermStep*i2, shortTermStep*j1, shortTermStep*j2, S)
Exemple #6
0
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow = 0.5, Weight = 0.5, plot = False):
	'''
	Event Detection (silence removal)
	ARGUMENTS:
		 - x:			the input audio signal
		 - Fs:			sampling freq
		 - stWin, stStep:	window size and step in seconds
		 - smoothWindow:	(optinal) smooth window (in seconds)
		 - Weight:		(optinal) weight factor (0 < Weight < 1) the higher, the more strict
		 - plot:		(optinal) True if results are to be plotted
	RETURNS:
		 - segmentLimits:	list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that 
					the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds 
	'''

	if Weight>=1:
		Weight = 0.99;
	if Weight<=0:
		Weight = 0.01;

	# Step 1: feature extraction
	x = audioBasicIO.stereo2mono(x);						# convert to mono
	ShortTermFeatures = aF.stFeatureExtraction(x, Fs, stWin*Fs, stStep*Fs)		# extract short-term features	

	# Step 2: train binary SVM classifier of low vs high energy frames
	EnergySt = ShortTermFeatures[1, :]						# keep only the energy short-term sequence (2nd feature)
	E = numpy.sort(EnergySt)							# sort the energy feature values:
	L1 = int(len(E)/10)								# number of 10% of the total short-term windows
	T1 = numpy.mean(E[0:L1])							# compute "lower" 10% energy threshold 
	T2 = numpy.mean(E[-L1:-1])							# compute "higher" 10% energy threshold
	Class1 = ShortTermFeatures[:,numpy.where(EnergySt<T1)[0]]			# get all features that correspond to low energy
	Class2 = ShortTermFeatures[:,numpy.where(EnergySt>T2)[0]]			# get all features that correspond to high energy
	featuresSS = [Class1.T, Class2.T];						# form the binary classification task and ...
	[featuresNormSS, MEANSS, STDSS] = aT.normalizeFeatures(featuresSS)		# normalize and ...
	SVM = aT.trainSVM(featuresNormSS, 1.0)						# train the respective SVM probabilistic model (ONSET vs SILENCE)

	# Step 3: compute onset probability based on the trained SVM
	ProbOnset = []
	for i in range(ShortTermFeatures.shape[1]):					# for each frame
		curFV = (ShortTermFeatures[:,i] - MEANSS) / STDSS			# normalize feature vector
		ProbOnset.append(SVM.pred_probability(curFV)[1])			# get SVM probability (that it belongs to the ONSET class)
	ProbOnset = numpy.array(ProbOnset)
	ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep)			# smooth probability

	# Step 4A: detect onset frame indices:
	ProbOnsetSorted = numpy.sort(ProbOnset)						# find probability Threshold as a weighted average of top 10% and lower 10% of the values
	Nt = ProbOnsetSorted.shape[0] / 10;	
	T = (numpy.mean( (1-Weight)*ProbOnsetSorted[0:Nt] ) + Weight*numpy.mean(ProbOnsetSorted[-Nt::]) )

	MaxIdx = numpy.where(ProbOnset>T)[0];						# get the indices of the frames that satisfy the thresholding
	i = 0;
	timeClusters = []
	segmentLimits = []

	# Step 4B: group frame indices to onset segments
	while i<len(MaxIdx):								# for each of the detected onset indices
		curCluster = [MaxIdx[i]]
		if i==len(MaxIdx)-1:
			break		
		while MaxIdx[i+1] - curCluster[-1] <= 2:
			curCluster.append(MaxIdx[i+1])
			i += 1
			if i==len(MaxIdx)-1:
				break
		i += 1
		timeClusters.append(curCluster)
		segmentLimits.append([curCluster[0]*stStep, curCluster[-1]*stStep])

	# Step 5: Post process: remove very small segments:
	minDuration = 0.2;
	segmentLimits2 = []
	for s in segmentLimits:
		if s[1] - s[0] > minDuration:
			segmentLimits2.append(s)
	segmentLimits = segmentLimits2;

	if plot:
		timeX = numpy.arange(0, x.shape[0] / float(Fs) , 1.0/Fs)

		plt.subplot(2,1,1); plt.plot(timeX, x)
		for s in segmentLimits:
			plt.axvline(x=s[0]); 
			plt.axvline(x=s[1]); 
		plt.subplot(2,1,2); plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset);
		plt.title('Signal')
		for s in segmentLimits:
			plt.axvline(x=s[0]); 
			plt.axvline(x=s[1]); 
		plt.title('SVM Probability')
		plt.show()

	return segmentLimits
Exemple #7
0
import audioBasicIO
import audioFeatureExtraction
import matplotlib.pyplot as plt
import numpy
print("COUNT 1\n")
[Fs1, x1] = audioBasicIO.readAudioFile("data/practice.wav")
F1 = audioFeatureExtraction.stFeatureExtraction(x1, Fs1, 0.050 * Fs1,
                                                0.025 * Fs1)
# F1[12*420] MATRIX
print(len(F1[9:21]), len(F1[9:21][0]))

print("\n\nCOUNT 2\n")
[Fs2, x2] = audioBasicIO.readAudioFile("data/practice2.wav")
F2 = audioFeatureExtraction.stFeatureExtraction(x2, Fs2, 0.050 * Fs2,
                                                0.025 * Fs2)

print(len(F2[9:21]), len(F2[9:21][0]))

size = min(len(F2[9:21][0]), len(F1[9:21][0]))
print(size, "\n")

print("\n\nCORRCOEF\n")
print(numpy.corrcoef(F1[9:21, 0:size], F2[9:21, 0:size]) * 0.5 + 0.5)

print("\n\nE Distance\n")
print(numpy.linalg.norm(F1[9:21, 0:size] - F2[9:21, 0:size]))
def silenceRemoval(x,
                   fs,
                   st_win,
                   st_step,
                   smoothWindow=0.5,
                   weight=0.5,
                   plot=False):
    '''
	Event Detection (silence removal)
	ARGUMENTS:
		 - x:                the input audio signal
		 - fs:               sampling freq
		 - st_win, st_step:    window size and step in seconds
		 - smoothWindow:     (optinal) smooth window (in seconds)
		 - weight:           (optinal) weight factor (0 < weight < 1) the higher, the more strict
		 - plot:             (optinal) True if results are to be plotted
	RETURNS:
		 - seg_limits:    list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that
					the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds
	'''

    if weight >= 1:
        weight = 0.99
    if weight <= 0:
        weight = 0.01

    # Step 1: feature extraction
    x = audioBasicIO.stereo2mono(x)
    st_feats, _ = aF.stFeatureExtraction(x, fs, st_win * fs, st_step * fs)

    # Step 2: train binary svm classifier of low vs high energy frames
    # keep only the energy short-term sequence (2nd feature)
    st_energy = st_feats[1, :]
    en = numpy.sort(st_energy)
    # number of 10% of the total short-term windows
    l1 = int(len(en) / 10)
    # compute "lower" 10% energy threshold
    t1 = numpy.mean(en[0:l1]) + 0.000000000000001
    # compute "higher" 10% energy threshold
    t2 = numpy.mean(en[-l1:-1]) + 0.000000000000001
    # get all features that correspond to low energy
    class1 = st_feats[:, numpy.where(st_energy <= t1)[0]]
    # get all features that correspond to high energy
    class2 = st_feats[:, numpy.where(st_energy >= t2)[0]]
    # form the binary classification task and ...
    faets_s = [class1.T, class2.T]
    # normalize and train the respective svm probabilistic model
    # (ONSET vs SILENCE)
    [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s)
    svm = aT.trainSVM(faets_s_norm, 1.0)

    # Step 3: compute onset probability based on the trained svm
    prob_on_set = []
    for i in range(st_feats.shape[1]):
        # for each frame
        cur_fv = (st_feats[:, i] - means_s) / stds_s
        # get svm probability (that it belongs to the ONSET class)
        prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1])
    prob_on_set = numpy.array(prob_on_set)
    # smooth probability:
    prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step)

    # Step 4A: detect onset frame indices:
    prog_on_set_sort = numpy.sort(prob_on_set)
    # find probability Threshold as a weighted average
    # of top 10% and lower 10% of the values
    Nt = int(prog_on_set_sort.shape[0] / 10)
    T = (numpy.mean((1 - weight) * prog_on_set_sort[0:Nt]) +
         weight * numpy.mean(prog_on_set_sort[-Nt::]))

    max_idx = numpy.where(prob_on_set > T)[0]
    # get the indices of the frames that satisfy the thresholding
    i = 0
    time_clusters = []
    seg_limits = []

    # Step 4B: group frame indices to onset segments
    while i < len(max_idx):
        # for each of the detected onset indices
        cur_cluster = [max_idx[i]]
        if i == len(max_idx) - 1:
            break
        while max_idx[i + 1] - cur_cluster[-1] <= 2:
            cur_cluster.append(max_idx[i + 1])
            i += 1
            if i == len(max_idx) - 1:
                break
        i += 1
        time_clusters.append(cur_cluster)
        seg_limits.append(
            [cur_cluster[0] * st_step, cur_cluster[-1] * st_step])

    # Step 5: Post process: remove very small segments:
    min_dur = 0.2
    seg_limits_2 = []
    for s in seg_limits:
        if s[1] - s[0] > min_dur:
            seg_limits_2.append(s)
    seg_limits = seg_limits_2

    if plot:
        timeX = numpy.arange(0, x.shape[0] / float(fs), 1.0 / fs)

        plt.subplot(2, 1, 1)
        plt.plot(timeX, x)
        for s in seg_limits:
            plt.axvline(x=s[0])
            plt.axvline(x=s[1])
        plt.subplot(2, 1, 2)
        plt.plot(numpy.arange(0, prob_on_set.shape[0] * st_step, st_step),
                 prob_on_set)
        plt.title('Signal')
        for s in seg_limits:
            plt.axvline(x=s[0])
            plt.axvline(x=s[1])
        plt.title('svm Probability')
        plt.show()

    return seg_limits
Exemple #9
0
def main(argv):
    if argv[1] == "-dirMp3toWAV":  # convert mp3 to wav (batch)
        if len(argv) == 5:
            path = argv[2]
            if argv[3] not in ["8000", "16000", "32000", "44100"]:
                print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."
                return
            if argv[4] not in ["1", "2"]:
                print "Error. Number of output channels must be 1 or 2"
                return
            if not os.path.isdir(path):
                raise Exception("Input path not found!")
            useMp3TagsAsNames = True
            audioBasicIO.convertDirMP3ToWav(path, int(argv[3]), int(argv[4]),
                                            useMp3TagsAsNames)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>"

    if argv[1] == "-dirWAVChangeFs":  # convert mp3 to wav (batch)
        if len(argv) == 5:
            path = argv[2]
            if argv[3] not in ["8000", "16000", "32000", "44100"]:
                print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."
                return
            if argv[4] not in ["1", "2"]:
                print "Error. Number of output channels must be 1 or 2"
                return
            if not os.path.isdir(path):
                raise Exception("Input path not found!")
            audioBasicIO.convertFsDirWavToWav(path, int(argv[3]), int(argv[4]))
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>"

    elif argv[
            1] == "-featureExtractionFile":  # short-term and mid-term feature extraction to files (csv and numpy)
        if len(argv) == 7:
            wavFileName = argv[2]
            if not os.path.isfile(wavFileName):
                raise Exception("Input audio file not found!")
            if not (uT.isNum(argv[3]) and uT.isNum(argv[4])
                    and uT.isNum(argv[5]) and uT.isNum(argv[6])):
                raise Exception(
                    "Mid-term and short-term window sizes and steps must be numbers!"
                )
            mtWin = float(argv[3])
            mtStep = float(argv[4])
            stWin = float(argv[5])
            stStep = float(argv[6])
            outFile = wavFileName
            aF.mtFeatureExtractionToFile(wavFileName, mtWin, mtStep, stWin,
                                         stStep, outFile, True, True, True)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -featureExtractionFile <wavFileName> <mtWin> <mtStep> <stWin> <stStep>"

    elif argv[1] == "-beatExtraction":
        if len(argv) == 4:
            wavFileName = argv[2]
            if not os.path.isfile(wavFileName):
                raise Exception("Input audio file not found!")
            if not (uT.isNum(argv[3])):
                raise Exception("PLOT must be either 0 or 1")
            if not ((int(argv[3]) == 0) or (int(argv[3]) == 1)):
                raise Exception("PLOT must be either 0 or 1")

            [Fs, x] = audioBasicIO.readAudioFile(wavFileName)
            F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs)
            BPM, ratio = aF.beatExtraction(F, 0.050, int(argv[3]) == 1)
            print "Beat: {0:d} bpm ".format(int(BPM))
            print "Ratio: {0:.2f} ".format(ratio)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -beatExtraction <wavFileName> <PLOT (0 or 1)>"

    elif argv[
            1] == '-featureExtractionDir':  # same as -featureExtractionFile, in a batch mode (i.e. for each WAV file in the provided path)
        if len(argv) == 7:
            path = argv[2]
            if not os.path.isdir(path):
                raise Exception("Input path not found!")
            if not (uT.isNum(argv[3]) and uT.isNum(argv[4])
                    and uT.isNum(argv[5]) and uT.isNum(argv[6])):
                raise Exception(
                    "Mid-term and short-term window sizes and steps must be numbers!"
                )
            mtWin = float(argv[3])
            mtStep = float(argv[4])
            stWin = float(argv[5])
            stStep = float(argv[6])
            aF.mtFeatureExtractionToFileDir(path, mtWin, mtStep, stWin, stStep,
                                            True, True, True)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -featureExtractionDir <path> <mtWin> <mtStep> <stWin> <stStep>"

    elif argv[
            1] == '-featureVisualizationDir':  # visualize the content relationships between recordings stored in a folder
        if len(argv) == 3:
            if not os.path.isdir(argv[2]):
                raise Exception("Input folder not found!")
            aV.visualizeFeaturesFolder(argv[2], "pca", "")

    elif argv[
            1] == '-fileSpectrogram':  # show spectogram of a sound stored in a file
        if len(argv) == 3:
            wavFileName = argv[2]
            if not os.path.isfile(wavFileName):
                raise Exception("Input audio file not found!")
            [Fs, x] = audioBasicIO.readAudioFile(wavFileName)
            x = audioBasicIO.stereo2mono(x)
            specgram, TimeAxis, FreqAxis = aF.stSpectogram(
                x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)
        else:
            print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>"

    elif argv[
            1] == '-fileChromagram':  # show spectogram of a sound stored in a file
        if len(argv) == 3:
            wavFileName = argv[2]
            if not os.path.isfile(wavFileName):
                raise Exception("Input audio file not found!")
            [Fs, x] = audioBasicIO.readAudioFile(wavFileName)
            x = audioBasicIO.stereo2mono(x)
            specgram, TimeAxis, FreqAxis = aF.stChromagram(
                x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)
        else:
            print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>"

    elif argv[1] == "-trainClassifier":  # Segment classifier training (OK)
        if len(argv) > 6:
            method = argv[2]
            beatFeatures = (int(argv[3]) == 1)
            listOfDirs = argv[4:len(argv) - 1]
            modelName = argv[-1]
            aT.featureAndTrain(listOfDirs,
                               1,
                               1,
                               aT.shortTermWindow,
                               aT.shortTermStep,
                               method.lower(),
                               modelName,
                               computeBEAT=beatFeatures)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -trainClassifier <method(svm or knn)> <beat features> <directory 1> <directory 2> ... <directory N> <modelName>"

    elif argv[1] == "-trainRegression":  # Segment regression model
        if len(argv) == 6:
            method = argv[2]
            beatFeatures = (int(argv[3]) == 1)
            dirName = argv[4]
            modelName = argv[5]
            aT.featureAndTrainRegression(dirName,
                                         1,
                                         1,
                                         aT.shortTermWindow,
                                         aT.shortTermStep,
                                         method.lower(),
                                         modelName,
                                         computeBEAT=beatFeatures)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -trainRegression <method(svm or knn)> <beat features> <directory> <modelName>"

    elif argv[1] == "-classifyFile":  # Single File Classification (OK)
        if len(argv) == 5:
            modelType = argv[2]
            modelName = argv[3]
            inputFile = argv[4]

            if modelType not in ["svm", "knn"]:
                raise Exception("ModelType has to be either svm or knn!")
            if not os.path.isfile(modelName):
                raise Exception("Input modelName not found!")
            if not os.path.isfile(inputFile):
                raise Exception("Input audio file not found!")

            [Result, P,
             classNames] = aT.fileClassification(inputFile, modelName,
                                                 modelType)
            print "{0:s}\t{1:s}".format("Class", "Probability")
            for i, c in enumerate(classNames):
                print "{0:s}\t{1:.2f}".format(c, P[i])
            print "Winner class: " + classNames[int(Result)]
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -classifyFile <method(svm or knn)> <modelName> <fileName>"

    elif argv[1] == "-regressionFile":  # Single File Classification (OK)
        if len(argv) == 5:
            modelType = argv[2]
            modelName = argv[3]
            inputFile = argv[4]

            if modelType not in ["svm", "knn"]:
                raise Exception("ModelType has to be either svm or knn!")
            if not os.path.isfile(inputFile):
                raise Exception("Input audio file not found!")

            R, regressionNames = aT.fileRegression(inputFile, modelName,
                                                   modelType)
            for i in range(len(R)):
                print "{0:s}\t{1:.3f}".format(regressionNames[i], R[i])

            #print "{0:s}\t{1:.2f}".format(c,P[i])

        else:
            print "Error.\nSyntax: " + argv[
                0] + " -regressionFile <method(svm or knn)> <modelName> <fileName>"

    elif argv[1] == "-classifyFolder":  # Directory classification (Ok)
        if len(argv) == 6 or len(argv) == 5:
            modelType = argv[2]
            modelName = argv[3]
            inputFolder = argv[4]
            if len(argv) == 6:
                outputMode = argv[5]
            else:
                outputMode = "0"

            if modelType not in ["svm", "knn"]:
                raise Exception("ModelType has to be either svm or knn!")
            if outputMode not in ["0", "1"]:
                raise Exception("outputMode has to be 0 or 1")
            if not os.path.isfile(modelName):
                raise Exception("Input modelName not found!")
            files = '*.wav'
            if os.path.isdir(inputFolder):
                strFilePattern = os.path.join(inputFolder, files)
            else:
                strFilePattern = inputFolder + files

            wavFilesList = []
            wavFilesList.extend(glob.glob(strFilePattern))
            wavFilesList = sorted(wavFilesList)
            if len(wavFilesList) == 0:
                print "No WAV files found!"
                return
            Results = []
            for wavFile in wavFilesList:
                [Result, P,
                 classNames] = aT.fileClassification(wavFile, modelName,
                                                     modelType)
                Result = int(Result)
                Results.append(Result)
                if outputMode == "1":
                    print "{0:s}\t{1:s}".format(wavFile, classNames[Result])
            Results = numpy.array(Results)
            # print distribution of classes:
            [Histogram,
             _] = numpy.histogram(Results,
                                  bins=numpy.arange(len(classNames) + 1))
            for i, h in enumerate(Histogram):
                print "{0:20s}\t\t{1:d}".format(classNames[i], h)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -classifyFolder <method(svm or knn)> <modelName> <folderName> <outputMode(0 or 1)"

    elif argv[
            1] == "-regressionFolder":  # Regression applied on the WAV files of a folder
        if len(argv) == 5:
            modelType = argv[2]
            modelName = argv[3]
            inputFolder = argv[4]

            if modelType not in ["svm", "knn"]:
                raise Exception("ModelType has to be either svm or knn!")

            files = '*.wav'
            if os.path.isdir(inputFolder):
                strFilePattern = os.path.join(inputFolder, files)
            else:
                strFilePattern = inputFolder + files

            wavFilesList = []
            wavFilesList.extend(glob.glob(strFilePattern))
            wavFilesList = sorted(wavFilesList)
            if len(wavFilesList) == 0:
                print "No WAV files found!"
                return
            Results = []
            for wavFile in wavFilesList:
                R, regressionNames = aT.fileRegression(wavFile, modelName,
                                                       modelType)
                Results.append(R)
            Results = numpy.array(Results)
            for i, r in enumerate(regressionNames):
                [Histogram, bins] = numpy.histogram(Results[:, i])
                centers = (bins[0:-1] + bins[1::]) / 2.0
                plt.subplot(len(regressionNames), 1, i)
                plt.plot(centers, Histogram)
                plt.title(r)
            plt.show()


#					for h in Histogram:
#						print "{0:20d}".format(h),
#				if outputMode=="1":
#					for i,h in enumerate(Histogram):
#						print "{0:20s}\t\t{1:d}".format(classNames[i], h)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -regressionFolder <method(svm or knn)> <modelName> <folderName>"

    elif argv[1] == '-trainHMMsegmenter_fromfile':
        if len(argv) == 7:
            wavFile = argv[2]
            gtFile = argv[3]
            hmmModelName = argv[4]
            if not uT.isNum(argv[5]):
                print "Error: mid-term window size must be float!"
                return
            if not uT.isNum(argv[6]):
                print "Error: mid-term window step must be float!"
                return
            mtWin = float(argv[5])
            mtStep = float(argv[6])
            if not os.path.isfile(wavFile):
                print "Error: wavfile does not exist!"
                return
            if not os.path.isfile(gtFile):
                print "Error: groundtruth does not exist!"
                return
            aS.trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -trainHMMsegmenter_fromfile <wavFilePath> <gtSegmentFilePath> <hmmModelFileName> <mtWin> <mtStep>"

    elif argv[1] == '-trainHMMsegmenter_fromdir':
        if len(argv) == 6:
            dirPath = argv[2]
            hmmModelName = argv[3]
            if not uT.isNum(argv[4]):
                print "Error: mid-term window size must be float!"
            if not uT.isNum(argv[5]):
                print "Error: mid-term window step must be float!"
            mtWin = float(argv[4])
            mtStep = float(argv[5])
            aS.trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -trainHMMsegmenter_fromdir <dirPath> <hmmModelFileName> <mtWin> <mtStep>"

    elif argv[
            1] == "-segmentClassifyFileHMM":  # HMM-based segmentation-classification
        if len(argv) == 4:
            hmmModelName = argv[2]
            wavFile = argv[3]
            gtFile = wavFile.replace('.wav', '.segments')
            aS.hmmSegmentation(wavFile,
                               hmmModelName,
                               PLOT=True,
                               gtFileName=gtFile)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -segmentClassifyHMM <hmmModelName> <fileName>"

    elif argv[
            1] == '-segmentClassifyFile':  # Segmentation-classification (fix-sized segment using knn or svm)
        if (len(argv) == 5):
            modelType = argv[2]
            modelName = argv[3]
            inputWavFile = argv[4]

            if modelType not in ["svm", "knn"]:
                raise Exception("ModelType has to be either svm or knn!")
            if not os.path.isfile(modelName):
                raise Exception("Input modelName not found!")
            if not os.path.isfile(inputWavFile):
                raise Exception("Input audio file not found!")
            gtFile = inputWavFile.replace('.wav', '.segments')
            aS.mtFileClassification(inputWavFile, modelName, modelType, True,
                                    gtFile)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -segmentClassifyFile <method(svm or knn)> <modelName> <fileName>"

    elif argv[1] == "-segmentationEvaluation":
        if len(argv) == 5:
            methodName = argv[2]
            modelName = argv[3]
            dirName = argv[4]
            aS.evaluateSegmentationClassificationDir(dirName, modelName,
                                                     methodName)
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -segmentationEvaluation <method(svm or knn)> <modelName> <directoryName>"

    elif argv[1] == "-silenceRemoval":
        if len(argv) == 5:
            inputFile = argv[2]
            if not os.path.isfile(inputFile):
                raise Exception("Input audio file not found!")

            smoothingWindow = float(argv[3])
            weight = float(argv[4])
            [Fs,
             x] = audioBasicIO.readAudioFile(inputFile)  # read audio signal
            segmentLimits = aS.silenceRemoval(x, Fs, 0.05, 0.05,
                                              smoothingWindow, weight,
                                              False)  # get onsets
            for i, s in enumerate(segmentLimits):
                strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format(
                    inputFile[0:-4], s[0], s[1])
                wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
        else:
            print "Error.\nSyntax: " + argv[
                0] + " -silenceRemoval <inputFile> <smoothinWindow(secs)> <Threshold Weight>"

    elif argv[
            1] == '-speakerDiarization':  # speaker diarization (from file): TODO
        inputFile = argv[2]
        nSpeakers = int(argv[3])
        useLDA = (int(argv[4]) == 1)
        if useLDA:
            aS.speakerDiarization(inputFile, nSpeakers, PLOT=True)
        else:
            aS.speakerDiarization(inputFile, nSpeakers, LDAdim=0, PLOT=True)
        #print speechLimits

    elif argv[1] == "-speakerDiarizationScriptEval":
        dir = argv[2]
        listOfLDAs = [int(l) for l in argv[3::]]
        aS.speakerDiarizationEvaluateScript(dir, listOfLDAs)

    elif argv[1] == '-thumbnail':  # music thumbnailing (OK)
        if len(argv) == 4:
            inputFile = argv[2]
            stWindow = 1.0
            stStep = 1.0
            if not os.path.isfile(inputFile):
                raise Exception("Input audio file not found!")

            [Fs, x] = audioBasicIO.readAudioFile(inputFile)  # read file
            if Fs == -1:  # could not read file
                return
            try:
                thumbnailSize = float(argv[3])
            except ValueError:
                print "Thumbnail size must be a float (in seconds)"
                return
            [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(
                x, Fs, stWindow, stStep,
                thumbnailSize)  # find thumbnail endpoints

            # write thumbnails to WAV files:
            thumbnailFileName1 = inputFile.replace(".wav", "_thumb1.wav")
            thumbnailFileName2 = inputFile.replace(".wav", "_thumb2.wav")
            wavfile.write(thumbnailFileName1, Fs, x[int(Fs * A1):int(Fs * A2)])
            wavfile.write(thumbnailFileName2, Fs, x[int(Fs * B1):int(Fs * B2)])
            print "1st thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(
                thumbnailFileName1, A1, A2)
            print "2nd thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(
                thumbnailFileName2, B1, B2)

            # Plot self-similarity matrix:
            fig = plt.figure()
            ax = fig.add_subplot(111, aspect='auto')
            plt.imshow(Smatrix)
            # Plot best-similarity diagonal:
            Xcenter = (A1 / stStep + A2 / stStep) / 2.0
            Ycenter = (B1 / stStep + B2 / stStep) / 2.0

            e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter),
                                            thumbnailSize * 1.4,
                                            3,
                                            angle=45,
                                            linewidth=3,
                                            fill=False)
            ax.add_patch(e1)

            plt.plot([B1, Smatrix.shape[0]], [A1, A1],
                     color='k',
                     linestyle='--',
                     linewidth=2)
            plt.plot([B2, Smatrix.shape[0]], [A2, A2],
                     color='k',
                     linestyle='--',
                     linewidth=2)
            plt.plot([B1, B1], [A1, Smatrix.shape[0]],
                     color='k',
                     linestyle='--',
                     linewidth=2)
            plt.plot([B2, B2], [A2, Smatrix.shape[0]],
                     color='k',
                     linestyle='--',
                     linewidth=2)

            plt.xlim([0, Smatrix.shape[0]])
            plt.ylim([Smatrix.shape[1], 0])

            ax.yaxis.set_label_position("right")
            ax.yaxis.tick_right()

            plt.xlabel('frame no')
            plt.ylabel('frame no')
            plt.title('Self-similarity matrix')

            plt.show()

        else:
            print "Error.\nSyntax: " + argv[
                0] + " -thumbnail <filename> <thumbnailsize(seconds)>"
Exemple #10
0
import audioBasicIO
import audioFeatureExtraction
import matplotlib.pyplot as plt
import audioTrainTest as aT

plot = False
[Fs, x] = audioBasicIO.readAudioFile("data/Heavy.wav")
F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.025 * Fs)
# ZCR: The rate of sign-changes of the signal during the duration of a particular frame.
if plot:
    plt.subplot(2, 2, 1)
    plt.plot(F[0, :])
    plt.xlabel('Frame no')
    plt.ylabel('ZCR')
    plt.subplot(2, 2, 2)
    plt.plot(F[1, :])
    plt.xlabel('Frame no')
    plt.ylabel('Energy')
    plt.subplot(2, 2, 3)
    plt.plot(F[2, :])
    plt.xlabel('Frame no')
    plt.ylabel('Entropy of Energy')
    plt.subplot(2, 2, 4)
    plt.plot(F[3, :])
    plt.xlabel('Frame no')
    plt.ylabel('Spectral Centroid')
    plt.show()

Result, P, classNames = aT.fileClassification("data/Heavy.wav",
                                              "data/svmMusicGenre3", "svm")
print Result
def main(argv):
	if argv[1] == "-dirMp3toWAV":				# convert mp3 to wav (batch)
		if len(argv)==5:			
			path = argv[2]
			if argv[3] not in ["8000", "16000", "32000", "44100"]:
				print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."; return
			if argv[4] not in ["1","2"]:
				print "Error. Number of output channels must be 1 or 2"; return
			if not os.path.isdir(path):
				raise Exception("Input path not found!")
			useMp3TagsAsNames = True
			audioBasicIO.convertDirMP3ToWav(path, int(argv[3]), int(argv[4]), useMp3TagsAsNames)
		else:
			print "Error.\nSyntax: " + argv[0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>"

	if argv[1] == "-dirWAVChangeFs":				# convert mp3 to wav (batch)
		if len(argv)==5:			
			path = argv[2]
			if argv[3] not in ["8000", "16000", "32000", "44100"]:
				print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."; return
			if argv[4] not in ["1","2"]:
				print "Error. Number of output channels must be 1 or 2"; return
			if not os.path.isdir(path):
				raise Exception("Input path not found!")
			audioBasicIO.convertFsDirWavToWav(path, int(argv[3]), int(argv[4]))
		else:
			print "Error.\nSyntax: " + argv[0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>"

	elif argv[1] == "-featureExtractionFile":		# short-term and mid-term feature extraction to files (csv and numpy)
		if len(argv)==7:
			wavFileName = argv[2]
			if not os.path.isfile(wavFileName):
				raise Exception("Input audio file not found!")
			if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])):
				raise Exception("Mid-term and short-term window sizes and steps must be numbers!")
			mtWin = float(argv[3])
			mtStep = float(argv[4])
			stWin = float(argv[5])
			stStep = float(argv[6])
			outFile = wavFileName
			aF.mtFeatureExtractionToFile(wavFileName, mtWin, mtStep, stWin, stStep, outFile, True, True, True)
		else:
			print "Error.\nSyntax: " + argv[0] + " -featureExtractionFile <wavFileName> <mtWin> <mtStep> <stWin> <stStep>"

	elif argv[1] == "-beatExtraction":
		if len(argv)==4:
			wavFileName = argv[2]
			if not os.path.isfile(wavFileName):
				raise Exception("Input audio file not found!")
			if not (uT.isNum(argv[3])):
				raise Exception("PLOT must be either 0 or 1")
			if not ( (int(argv[3]) == 0) or (int(argv[3]) == 1) ):
				raise Exception("PLOT must be either 0 or 1")

			[Fs, x] = audioBasicIO.readAudioFile(wavFileName);
			F = aF.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs);
			BPM, ratio = aF.beatExtraction(F, 0.050, int(argv[3])==1)
			print "Beat: {0:d} bpm ".format(int(BPM))
			print "Ratio: {0:.2f} ".format(ratio)
		else:
			print "Error.\nSyntax: " + argv[0] + " -beatExtraction <wavFileName> <PLOT (0 or 1)>"


	elif argv[1] == '-featureExtractionDir':	# same as -featureExtractionFile, in a batch mode (i.e. for each WAV file in the provided path)
		if len(argv)==7:
			path = argv[2]
			if not os.path.isdir(path):
				raise Exception("Input path not found!")
			if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])):
				raise Exception("Mid-term and short-term window sizes and steps must be numbers!")
			mtWin = float(argv[3])
			mtStep = float(argv[4])
			stWin = float(argv[5])
			stStep = float(argv[6])
			aF.mtFeatureExtractionToFileDir(path, mtWin, mtStep, stWin, stStep, True, True, True)
		else:
			print "Error.\nSyntax: " + argv[0] + " -featureExtractionDir <path> <mtWin> <mtStep> <stWin> <stStep>"

	elif argv[1] == '-featureVisualizationDir':	# visualize the content relationships between recordings stored in a folder
		if len(argv)==3:
			if not os.path.isdir(argv[2]):
				raise Exception("Input folder not found!")
			aV.visualizeFeaturesFolder(argv[2], "pca", "")

	elif argv[1] == '-fileSpectrogram':		# show spectogram of a sound stored in a file
			if len(argv)==3:
				wavFileName = argv[2]		
				if not os.path.isfile(wavFileName):
					raise Exception("Input audio file not found!")
				[Fs, x] = audioBasicIO.readAudioFile(wavFileName)
				x = audioBasicIO.stereo2mono(x)
				specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs*0.040), round(Fs*0.040), True)
			else:
				print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>"

	elif argv[1] == '-fileChromagram':		# show spectogram of a sound stored in a file
			if len(argv)==3:
				wavFileName = argv[2]		
				if not os.path.isfile(wavFileName):
					raise Exception("Input audio file not found!")
				[Fs, x] = audioBasicIO.readAudioFile(wavFileName)
				x = audioBasicIO.stereo2mono(x)
				specgram, TimeAxis, FreqAxis = aF.stChromagram(x, Fs, round(Fs*0.040), round(Fs*0.040), True)
			else:
				print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>"


	elif argv[1] == "-trainClassifier": 		# Segment classifier training (OK)
			if len(argv)>6: 
				method = argv[2]
				beatFeatures = (int(argv[3])==1)
				listOfDirs = argv[4:len(argv)-1]
				modelName = argv[-1]			
				aT.featureAndTrain(listOfDirs, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT = beatFeatures)
			else:
				print "Error.\nSyntax: " + argv[0] + " -trainClassifier <method(svm or knn)> <beat features> <directory 1> <directory 2> ... <directory N> <modelName>"

	elif argv[1] == "-trainRegression": 		# Segment regression model
			if len(argv)==6: 
				method = argv[2]
				beatFeatures = (int(argv[3])==1)
				dirName = argv[4]
				modelName = argv[5]			
				aT.featureAndTrainRegression(dirName, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT = beatFeatures)
			else:
				print "Error.\nSyntax: " + argv[0] + " -trainRegression <method(svm or knn)> <beat features> <directory> <modelName>"

	elif argv[1] == "-classifyFile":		# Single File Classification (OK)
			if len(argv)==5: 
				modelType = argv[2]
				modelName = argv[3]
				inputFile = argv[4]

				if modelType not in ["svm", "knn"]:
					raise Exception("ModelType has to be either svm or knn!")
				if not os.path.isfile(modelName):
					raise Exception("Input modelName not found!")
				if not os.path.isfile(inputFile):
					raise Exception("Input audio file not found!")

				[Result, P, classNames] = aT.fileClassification(inputFile, modelName, modelType)
				print "{0:s}\t{1:s}".format("Class","Probability")
				for i,c in enumerate(classNames):
					print "{0:s}\t{1:.2f}".format(c,P[i])
				print "Winner class: " + classNames[int(Result)]
			else:
				print "Error.\nSyntax: " + argv[0] + " -classifyFile <method(svm or knn)> <modelName> <fileName>"

	elif argv[1] == "-regressionFile":		# Single File Classification (OK)
			if len(argv)==5: 
				modelType = argv[2]
				modelName = argv[3]
				inputFile = argv[4]

				if modelType not in ["svm", "knn"]:
					raise Exception("ModelType has to be either svm or knn!")
				if not os.path.isfile(inputFile):
					raise Exception("Input audio file not found!")

				R, regressionNames = aT.fileRegression(inputFile, modelName, modelType)
				for i in range(len(R)):
					print "{0:s}\t{1:.3f}".format(regressionNames[i], R[i])
				
				#print "{0:s}\t{1:.2f}".format(c,P[i])

			else:
				print "Error.\nSyntax: " + argv[0] + " -regressionFile <method(svm or knn)> <modelName> <fileName>"

	elif argv[1] == "-classifyFolder": 			# Directory classification (Ok)
			if len(argv)==6 or len(argv)==5: 
				modelType = argv[2]
				modelName = argv[3]
				inputFolder = argv[4]
				if len(argv)==6:
					outputMode = argv[5]
				else:
					outputMode = "0"

				if modelType not in ["svm", "knn"]:
					raise Exception("ModelType has to be either svm or knn!")
				if outputMode not in ["0","1"]:
					raise Exception("outputMode has to be 0 or 1")
				if not os.path.isfile(modelName):
					raise Exception("Input modelName not found!")
				files = '*.wav'
				if os.path.isdir(inputFolder):
					strFilePattern = os.path.join(inputFolder, files)
				else:
					strFilePattern = inputFolder + files

				wavFilesList = []
				wavFilesList.extend(glob.glob(strFilePattern))
				wavFilesList = sorted(wavFilesList)
				if len(wavFilesList)==0:
					print "No WAV files found!"
					return 
				Results = []
				for wavFile in wavFilesList:	
					[Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType)	
					Result = int(Result)
					Results.append(Result)
					if outputMode=="1":
						print "{0:s}\t{1:s}".format(wavFile,classNames[Result])
				Results = numpy.array(Results)
				# print distribution of classes:
				[Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1))
				for i,h in enumerate(Histogram):
					print "{0:20s}\t\t{1:d}".format(classNames[i], h)
			else:
				print "Error.\nSyntax: " + argv[0] + " -classifyFolder <method(svm or knn)> <modelName> <folderName> <outputMode(0 or 1)"

	elif argv[1] == "-regressionFolder": 			# Regression applied on the WAV files of a folder
			if len(argv)==5: 
				modelType = argv[2]
				modelName = argv[3]
				inputFolder = argv[4]

				if modelType not in ["svm", "knn"]:
					raise Exception("ModelType has to be either svm or knn!")

				files = '*.wav'
				if os.path.isdir(inputFolder):
					strFilePattern = os.path.join(inputFolder, files)
				else:
					strFilePattern = inputFolder + files

				wavFilesList = []
				wavFilesList.extend(glob.glob(strFilePattern))
				wavFilesList = sorted(wavFilesList)	
				if len(wavFilesList)==0:
					print "No WAV files found!"
					return 
				Results = []
				for wavFile in wavFilesList:	
					R, regressionNames = aT.fileRegression(wavFile, modelName, modelType)
					Results.append(R)
				Results = numpy.array(Results)
				for i, r in enumerate(regressionNames):
					[Histogram, bins] = numpy.histogram(Results[:, i])
					centers = (bins[0:-1] + bins[1::]) / 2.0
					plt.subplot(len(regressionNames), 1, i);
					plt.plot(centers, Histogram)
					plt.title(r)
				plt.show()
#					for h in Histogram:
#						print "{0:20d}".format(h),
#				if outputMode=="1":
#					for i,h in enumerate(Histogram):
#						print "{0:20s}\t\t{1:d}".format(classNames[i], h)
			else:
				print "Error.\nSyntax: " + argv[0] + " -regressionFolder <method(svm or knn)> <modelName> <folderName>"

	elif argv[1] == '-trainHMMsegmenter_fromfile':
		if len(argv)==7:
			wavFile = argv[2]
			gtFile = argv[3]
			hmmModelName = argv[4]
			if not uT.isNum(argv[5]):
				print "Error: mid-term window size must be float!"; return
			if not uT.isNum(argv[6]):
				print "Error: mid-term window step must be float!"; return
			mtWin = float(argv[5])
			mtStep = float(argv[6])
			if not os.path.isfile(wavFile):
				print "Error: wavfile does not exist!"; return
			if not os.path.isfile(gtFile):
				print "Error: groundtruth does not exist!"; return
			aS.trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep)
		else:
			print "Error.\nSyntax: " + argv[0] + " -trainHMMsegmenter_fromfile <wavFilePath> <gtSegmentFilePath> <hmmModelFileName> <mtWin> <mtStep>"

	elif argv[1] == '-trainHMMsegmenter_fromdir':
		if len(argv)==6:
			dirPath = argv[2]
			hmmModelName = argv[3]
			if not uT.isNum(argv[4]):
				print "Error: mid-term window size must be float!"
			if not uT.isNum(argv[5]):
				print "Error: mid-term window step must be float!"
			mtWin = float(argv[4])
			mtStep = float(argv[5])
			aS.trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep)
		else:
			print "Error.\nSyntax: " + argv[0] + " -trainHMMsegmenter_fromdir <dirPath> <hmmModelFileName> <mtWin> <mtStep>"

	elif argv[1] == "-segmentClassifyFileHMM":	# HMM-based segmentation-classification
		if len(argv)==4:
			hmmModelName = argv[2]
			wavFile = argv[3]
			gtFile = wavFile.replace('.wav', '.segments');			
			aS.hmmSegmentation(wavFile, hmmModelName, PLOT = True, gtFileName = gtFile)
		else:
			print "Error.\nSyntax: " + argv[0] + " -segmentClassifyHMM <hmmModelName> <fileName>"

	elif argv[1] == '-segmentClassifyFile':		# Segmentation-classification (fix-sized segment using knn or svm)
		if (len(argv)==5):
			modelType = argv[2]
			modelName = argv[3]
			inputWavFile = argv[4]

			if modelType not in ["svm", "knn"]:
				raise Exception("ModelType has to be either svm or knn!")
			if not os.path.isfile(modelName):
				raise Exception("Input modelName not found!")
			if not os.path.isfile(inputWavFile):
				raise Exception("Input audio file not found!")
			gtFile = inputWavFile.replace('.wav', '.segments');
			aS.mtFileClassification(inputWavFile, modelName, modelType, True, gtFile)
		else:
			print "Error.\nSyntax: " + argv[0] + " -segmentClassifyFile <method(svm or knn)> <modelName> <fileName>"

	elif argv[1] == "-segmentationEvaluation":
		if len(argv)==5:
			methodName = argv[2]
			modelName = argv[3]
			dirName = argv[4]
			aS.evaluateSegmentationClassificationDir(dirName, modelName, methodName)
		else:
			print "Error.\nSyntax: " + argv[0] + " -segmentationEvaluation <method(svm or knn)> <modelName> <directoryName>"

	elif argv[1] == "-silenceRemoval":
		if len(argv)==5:
			inputFile = argv[2]
			if not os.path.isfile(inputFile):
				raise Exception("Input audio file not found!")

			smoothingWindow = float(argv[3])
			weight = float(argv[4])
			[Fs, x] = audioBasicIO.readAudioFile(inputFile)						# read audio signal
			segmentLimits = aS.silenceRemoval(x, Fs, 0.05, 0.05, smoothingWindow, weight, False)	# get onsets
			for i, s in enumerate(segmentLimits):
				strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format(inputFile[0:-4], s[0], s[1])
				wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])
		else:
			print "Error.\nSyntax: " + argv[0] + " -silenceRemoval <inputFile> <smoothinWindow(secs)> <Threshold Weight>"

	elif argv[1] == '-speakerDiarization':		# speaker diarization (from file): TODO				
			inputFile = argv[2]
			nSpeakers = int(argv[3])
			useLDA = (int(argv[4])==1)			
			if useLDA:
				aS.speakerDiarization(inputFile, nSpeakers, PLOT = True);
			else:
				aS.speakerDiarization(inputFile, nSpeakers, LDAdim = 0, PLOT = True);
			#print speechLimits

	elif argv[1] == "-speakerDiarizationScriptEval":
			dir = argv[2]
			listOfLDAs = [int(l) for l in argv[3::]]
			aS.speakerDiarizationEvaluateScript(dir, listOfLDAs)

	elif argv[1] == '-thumbnail':			# music thumbnailing (OK)
			if len(argv)==4:	
				inputFile = argv[2]
				stWindow = 1.0
				stStep = 1.0
				if not os.path.isfile(inputFile):
					raise Exception("Input audio file not found!")

				[Fs, x] = audioBasicIO.readAudioFile(inputFile)						# read file
				if Fs == -1:	# could not read file
					return
				try:
					thumbnailSize = float(argv[3])
				except ValueError:
					print "Thumbnail size must be a float (in seconds)"
					return 
				[A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, Fs, stWindow, stStep, thumbnailSize)	# find thumbnail endpoints			

				# write thumbnails to WAV files:
				thumbnailFileName1 = inputFile.replace(".wav","_thumb1.wav")
				thumbnailFileName2 = inputFile.replace(".wav","_thumb2.wav")
				wavfile.write(thumbnailFileName1, Fs, x[int(Fs*A1):int(Fs*A2)])
				wavfile.write(thumbnailFileName2, Fs, x[int(Fs*B1):int(Fs*B2)])
				print "1st thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(thumbnailFileName1, A1, A2)
				print "2nd thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(thumbnailFileName2, B1, B2)

				# Plot self-similarity matrix:
				fig = plt.figure()
				ax = fig.add_subplot(111, aspect='auto')
				plt.imshow(Smatrix)
				# Plot best-similarity diagonal:
				Xcenter = (A1/stStep + A2/stStep) / 2.0
				Ycenter = (B1/stStep + B2/stStep) / 2.0


				e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailSize * 1.4, 3,
			             angle=45, linewidth=3, fill=False)
				ax.add_patch(e1)

				plt.plot([B1, Smatrix.shape[0]], [A1, A1], color='k', linestyle='--', linewidth=2)
				plt.plot([B2, Smatrix.shape[0]], [A2, A2], color='k', linestyle='--', linewidth=2)
				plt.plot([B1, B1], [A1, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2)
				plt.plot([B2, B2], [A2, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2)

				plt.xlim([0, Smatrix.shape[0]])
				plt.ylim([Smatrix.shape[1], 0])



				ax.yaxis.set_label_position("right")
				ax.yaxis.tick_right()


				plt.xlabel('frame no')
				plt.ylabel('frame no')
				plt.title('Self-similarity matrix')

				plt.show()

			else: 
				print "Error.\nSyntax: " + argv[0] + " -thumbnail <filename> <thumbnailsize(seconds)>"
import random as rn
import math
import operator
import numpy as np
import audioBasicIO
import audioFeatureExtraction

dire = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\angry"
c = 1
for filename in os.walk(dire):
    for x in filename[2]:
        label1 = []
        label = []
        file = filename[0] + "\\" + str(x)
        [Fs, x] = audioBasicIO.readAudioFile(file)
        F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.50 * Fs,
                                                       0.25 * Fs)
        for i in range(len(F[0])):
            label1.append(3)
        label.append(label1)
        G = np.append(F, label, axis=0)
        loc = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\angrycsv\an" + str(
            c) + ".csv"
        c = c + 1
        np.savetxt(loc, G, delimiter=",")

dire = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\sad"
c = 1

for filename in os.walk(dire):
    for x in filename[2]:
        label = []
Exemple #13
0
height = 20
k = 0
col, avg, med, std, maxm, minm = [], [], [], [], [], []

emo_labels, signal_data, filename = dirWav(
    '/media/shreya/New Volume1/datasets/EMO-DB/wav/', '*.wav')

output = np.asarray(emo_labels)

features = []
feat = []
feature = []
length_features = []

for i in signal_data:
    temp = audioFeatureExtraction.stFeatureExtraction(i, 16000, 1024, 1024)
    print 'temp', temp.shape
    avg.append(temp.mean(axis=1))
    med.append(np.median(temp, axis=1))
    std.append(np.std(temp, axis=1))
    maxm.append(np.amax(temp, axis=1))
    minm.append(np.amin(temp, axis=1))

mean = np.asarray(avg)
median = np.asarray(med)
maximum = np.asarray(maxm)
minimum = np.asarray(minm)
standard_deviation = np.asarray(std)


def reduce_zeroOneNorm(arr):
Exemple #14
0
#!/usr/bin/env python2.7
import audioBasicIO
import audioFeatureExtraction
import matplotlib.pyplot as plt
[Fs_x, x] = audioBasicIO.readAudioFile("emer/1.wav")
x = audioBasicIO.stereo2mono(x)
F_x = audioFeatureExtraction.stFeatureExtraction(x, Fs_x, 0.050 * Fs_x,
                                                 0.025 * Fs_x)

[Fs_y, y] = audioBasicIO.readAudioFile("nonemer/9.wav")
y = audioBasicIO.stereo2mono(y)
F_y = audioFeatureExtraction.stFeatureExtraction(y, Fs_y, 0.050 * Fs_y,
                                                 0.025 * Fs_y)

plt.subplot(2, 1, 1)
plt.plot(F_x[0, :])
plt.xlabel('emer')
plt.ylabel('ZCR')
plt.subplot(2, 1, 2)
plt.plot(F_y[0, :])
plt.xlabel('nonemer')
plt.ylabel('ZCR')
plt.show()

plt.subplot(2, 1, 1)
plt.plot(F_x[1, :])
plt.xlabel('emer')
plt.ylabel('Energy')
plt.subplot(2, 1, 2)
plt.plot(F_y[1, :])
plt.xlabel('nonemer')
	def analyzeFeatures(self, data):
		values, features = aF.stFeatureExtraction(data, self.Fs, 0.1 * self.Fs, 0.1 * self.Fs)
		for index, feature in enumerate(features):
			print("%s:\t%s" % (feature, features[index]))
def main(argv):
    if argv[1] == "-shortTerm":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            F = audioFeatureExtraction.stFeatureExtraction(
                x, Fs, 0.050 * Fs, 0.050 * Fs)
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("short-term feature extraction: {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-classifyFile":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            aT.fileClassification("diarizationExample.wav", "svmSM", "svm")
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-mtClassify":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            [flagsInd, classesAll, acc] = aS.mtFileClassification(
                "diarizationExample.wav", "svmSM", "svm", False, '')
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-hmmSegmentation":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            aS.hmmSegmentation('diarizationExample.wav',
                               'hmmRadioSM', False, '')
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-silenceRemoval":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav")
            segments = aS.silenceRemoval(
                x, Fs, 0.050, 0.050, smoothWindow=1.0, Weight=0.3, plot=False)
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print("Silence removal \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-thumbnailing":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(
                x1, Fs1, 1.0, 1.0, 15.0)  # find thumbnail endpoints
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print("Thumbnail \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-diarization-noLDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            aS.speakerDiarization("diarizationExample.wav",
                                  4, LDAdim=0, PLOT=False)
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print("Diarization \t {0:.1f} x realtime".format(perTime1))
    elif argv[1] == "-diarization-LDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            aS.speakerDiarization("diarizationExample.wav", 4, PLOT=False)
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print("Diarization \t {0:.1f} x realtime".format(perTime1))