def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False):
	if not os.path.isfile(modelName):
		raise Exception("Input modelName not found!")

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)

	PsAll = numpy.zeros((len(classNames), ))	
		
	files = "*.wav"
	if os.path.isdir(inputFolder):
		strFilePattern = os.path.join(inputFolder, files)
	else:
		strFilePattern = inputFolder + files

	wavFilesList = []
	wavFilesList.extend(glob.glob(strFilePattern))
	wavFilesList = sorted(wavFilesList)
	if len(wavFilesList)==0:
		print "No WAV files found!"
		return 
	
	Results = []
	for wavFile in wavFilesList:	
		[Fs, x] = audioBasicIO.readAudioFile(wavFile)	
		signalLength = x.shape[0] / float(Fs)
		[Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType)					
		PsAll += (numpy.array(P) * signalLength)		
		Result = int(Result)
		Results.append(Result)
		if outputMode:
			print "{0:s}\t{1:s}".format(wavFile,classNames[Result])
	Results = numpy.array(Results)
	
	# print distribution of classes:
	[Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1))
	if outputMode:	
		for i,h in enumerate(Histogram):
			print "{0:20s}\t\t{1:d}".format(classNames[i], h)
	PsAll = PsAll / numpy.sum(PsAll)


	if outputMode:	
		fig = plt.figure()
		ax = fig.add_subplot(111)
		plt.title("Classes percentage " + inputFolder.replace('Segments',''))
		ax.axis((0, len(classNames)+1, 0, 1))
		ax.set_xticks(numpy.array(range(len(classNames)+1)))
		ax.set_xticklabels([" "] + classNames)
		ax.bar(numpy.array(range(len(classNames)))+0.5, PsAll)
		plt.show()
	return classNames, PsAll
def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False):
	if not os.path.isfile(modelName):
		raise Exception("Input modelName not found!")

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)

	PsAll = numpy.zeros((len(classNames), ))	
		
	files = "*.wav"
	if os.path.isdir(inputFolder):
		strFilePattern = os.path.join(inputFolder, files)
	else:
		strFilePattern = inputFolder + files

	wavFilesList = []
	wavFilesList.extend(glob.glob(strFilePattern))
	wavFilesList = sorted(wavFilesList)
	if len(wavFilesList)==0:
		print "No WAV files found!"
		return 
	
	Results = []
	for wavFile in wavFilesList:	
		[Fs, x] = audioBasicIO.readAudioFile(wavFile)	
		signalLength = x.shape[0] / float(Fs)
		[Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType)					
		PsAll += (numpy.array(P) * signalLength)		
		Result = int(Result)
		Results.append(Result)
		if outputMode:
			print "{0:s}\t{1:s}".format(wavFile,classNames[Result])
	Results = numpy.array(Results)
	
	# print distribution of classes:
	[Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1))
	if outputMode:	
		for i,h in enumerate(Histogram):
			print "{0:20s}\t\t{1:d}".format(classNames[i], h)
	PsAll = PsAll / numpy.sum(PsAll)


	if outputMode:	
		fig = plt.figure()
		ax = fig.add_subplot(111)
		plt.title("Classes percentage " + inputFolder.replace('Segments',''))
		ax.axis((0, len(classNames)+1, 0, 1))
		ax.set_xticks(numpy.array(range(len(classNames)+1)))
		ax.set_xticklabels([" "] + classNames)
		ax.bar(numpy.array(range(len(classNames)))+0.5, PsAll)
		plt.show()
	return classNames, PsAll
def getMusicSegmentsFromFile(inputFile):	
	modelType = "svm"
	modelName = "data/svmMovies8classes"
	
	dirOutput = inputFile[0:-4] + "_musicSegments"
	
	if os.path.exists(dirOutput) and dirOutput!=".":
		shutil.rmtree(dirOutput)	
	os.makedirs(dirOutput)	
	
	[Fs, x] = audioBasicIO.readAudioFile(inputFile)	

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)

	flagsInd, classNames, acc = aS.mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = "")
	segs, classes = aS.flags2segs(flagsInd, mtStep)

	for i, s in enumerate(segs):
		if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration):
			strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput+os.sep, s[0], s[1])	
			wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])
Esempio n. 4
0
def getMusicSegmentsFromFile(inputFile):
    modelType = "svm"
    modelName = "data/svmMovies8classes"

    dirOutput = inputFile[0:-4] + "_musicSegments"

    if os.path.exists(dirOutput) and dirOutput != ".":
        shutil.rmtree(dirOutput)
    os.makedirs(dirOutput)

    [Fs, x] = audioBasicIO.readAudioFile(inputFile)

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadKNNModel(modelName)

    flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile,
                                                            modelName,
                                                            modelType,
                                                            plotResults=False,
                                                            gtFile="")
    segs, classes = aS.flags2segs(flagsInd, mtStep)

    for i, s in enumerate(segs):
        if (classNames[int(classes[i])]
                == "Music") and (s[1] - s[0] >= minDuration):
            strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput + os.sep,
                                                       s[0], s[1])
            wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
Esempio n. 5
0
def mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = ""):
	'''
	This function performs mid-term classification of an audio stream.
	Towards this end, supervised knowledge is used, i.e. a pre-trained classifier.
	ARGUMENTS:
		- inputFile:		path of the input WAV file
		- modelName:		name of the classification model
		- modelType:		svm or knn depending on the classifier type
		- plotResults:		True if results are to be plotted using matplotlib along with a set of statistics
	
	RETURNS:
	  	- segs:			a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds)
		- classes:		a sequence of class flags: class[i] is the class ID of the i-th segment
	'''

	if not os.path.isfile(modelName):
		print "mtFileClassificationError: input modelType not found!"
		return (-1,-1,-1)
	# Load classifier:
	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)
	if computeBEAT:
		print "Model " + modelName + " contains long-term music features (beat etc) and cannot be used in segmentation"	
		return (-1,-1,-1)
	[Fs, x] = audioBasicIO.readAudioFile(inputFile)					# load input file
	if Fs == -1:									# could not read file
		return  (-1,-1,-1)
	x = audioBasicIO.stereo2mono(x);						# convert stereo (if) to mono
	Duration = len(x) / Fs					
											# mid-term feature extraction:
	[MidTermFeatures, _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs*stWin), round(Fs*stStep));
	flags = []; Ps = []; flagsInd = []
	for i in range(MidTermFeatures.shape[1]): 					# for each feature vector (i.e. for each fix-sized segment):
		curFV = (MidTermFeatures[:, i] - MEAN) / STD;				# normalize current feature vector					
		[Result, P] = aT.classifierWrapper(Classifier, modelType, curFV)	# classify vector
		flagsInd.append(Result)
		flags.append(classNames[int(Result)])					# update class label matrix
		Ps.append(numpy.max(P))							# update probability matrix
	flagsInd = numpy.array(flagsInd)

	# 1-window smoothing
	for i in range(1, len(flagsInd)-1):
		if flagsInd[i-1]==flagsInd[i+1]:
			flagsInd[i] = flagsInd[i+1]
	(segs, classes) = flags2segs(flags, mtStep)					# convert fix-sized flags to segments and classes
	segs[-1] = len(x) / float(Fs)

	# Load grount-truth:
	if os.path.isfile(gtFile):
		[segStartGT, segEndGT, segLabelsGT] = readSegmentGT(gtFile)		
		flagsGT, classNamesGT = segs2flags(segStartGT, segEndGT, segLabelsGT, mtStep)
		flagsIndGT = []
		for j, fl in enumerate(flagsGT):					# "align" labels with GT
			if classNamesGT[flagsGT[j]] in classNames:
				flagsIndGT.append( classNames.index( classNamesGT[flagsGT[j]] ) )
			else:
				flagsIndGT.append( -1 )
		flagsIndGT = numpy.array(flagsIndGT)
	else:
		flagsIndGT = numpy.array([])
	acc = plotSegmentationResults(flagsInd, flagsIndGT, classNames, mtStep, not plotResults)
	if acc>=0:
		print "Overall Accuracy: {0:.3f}".format(acc)
	return (flagsInd, classNames, acc)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec,
                       modelName, modelType):
    '''
	recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType)

	This function is used to record and analyze audio segments, in a fix window basis.

	ARGUMENTS: 
	- duration			total recording duration
	- outputWavFile			path of the output WAV file
	- midTermBufferSizeSec		(fix)segment length in seconds
	- modelName			classification model name
	- modelType			classification model type

	'''
    if modelType == 'neuralnet':
        neuralNetClassidication(duration, midTermBufferSizeSec, modelName)
    else:

        if modelType == 'svm':
            [
                Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin,
                stStep, computeBEAT
            ] = aT.loadSVModel(modelName)
        elif modelType == 'knn':
            [
                Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin,
                stStep, computeBEAT
            ] = aT.loadKNNModel(modelName)
        else:
            Classifier = None

        inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
        inp.setchannels(1)
        inp.setrate(Fs)
        inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
        inp.setperiodsize(512)
        midTermBufferSize = int(midTermBufferSizeSec * Fs)
        allData = []
        midTermBuffer = []
        curWindow = []
        count = 0
        # a sequence of samples
        # process a sequence
        # speed
        # emergency vehicle detection what have they done? emergency vehicle classification patents
        # plot features!!!
        # patents extracted!
        # latex literature review
        # writing a paper
        #
        while len(allData) < duration * Fs:
            # Read data from device
            l, data = inp.read()
            if l:
                for i in range(l):
                    curWindow.append(audioop.getsample(data, 2, i))
                if (len(curWindow) + len(midTermBuffer) > midTermBufferSize):
                    samplesToCopyToMidBuffer = midTermBufferSize - len(
                        midTermBuffer)
                else:
                    samplesToCopyToMidBuffer = len(curWindow)
                midTermBuffer = midTermBuffer + curWindow[
                    0:samplesToCopyToMidBuffer]
                del (curWindow[0:samplesToCopyToMidBuffer])
            if len(midTermBuffer) == midTermBufferSize:
                count += 1
                if Classifier != None:
                    [mtFeatures, stFeatures
                     ] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0 * Fs,
                                                2.0 * Fs, 0.020 * Fs,
                                                0.020 * Fs)
                    curFV = (mtFeatures[:, 0] - MEAN) / STD
                    [result, P] = aT.classifierWrapper(Classifier, modelType,
                                                       curFV)
                    print classNames[int(result)]
                allData = allData + midTermBuffer

                plt.clf()
                plt.plot(midTermBuffer)
                plt.show(block=False)
                plt.draw()

                midTermBuffer = []

        allDataArray = numpy.int16(allData)
        wavfile.write(outputWavFile, Fs, allDataArray)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType):
    """
	recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType)

	This function is used to record and analyze audio segments, in a fix window basis.

	ARGUMENTS: 
	- duration			total recording duration
	- outputWavFile			path of the output WAV file
	- midTermBufferSizeSec		(fix)segment length in seconds
	- modelName			classification model name
	- modelType			classification model type

	"""

    if modelType == "svm":
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName)
    elif modelType == "knn":
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName)
    else:
        Classifier = None

    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
    inp.setchannels(1)
    inp.setrate(Fs)
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    inp.setperiodsize(512)
    midTermBufferSize = int(midTermBufferSizeSec * Fs)
    allData = []
    midTermBuffer = []
    curWindow = []
    count = 0

    while len(allData) < duration * Fs:
        # Read data from device
        l, data = inp.read()
        if l:
            for i in range(l):
                curWindow.append(audioop.getsample(data, 2, i))
            if len(curWindow) + len(midTermBuffer) > midTermBufferSize:
                samplesToCopyToMidBuffer = midTermBufferSize - len(midTermBuffer)
            else:
                samplesToCopyToMidBuffer = len(curWindow)
            midTermBuffer = midTermBuffer + curWindow[0:samplesToCopyToMidBuffer]
            del (curWindow[0:samplesToCopyToMidBuffer])
        if len(midTermBuffer) == midTermBufferSize:
            count += 1
            if Classifier != None:
                [mtFeatures, stFeatures] = aF.mtFeatureExtraction(
                    midTermBuffer, Fs, 2.0 * Fs, 2.0 * Fs, 0.020 * Fs, 0.020 * Fs
                )
                curFV = (mtFeatures[:, 0] - MEAN) / STD
                [result, P] = aT.classifierWrapper(Classifier, modelType, curFV)
                print classNames[int(result)]
            allData = allData + midTermBuffer

            plt.clf()
            plt.plot(midTermBuffer)
            plt.show(block=False)
            plt.draw()

            midTermBuffer = []

    allDataArray = numpy.int16(allData)
    wavfile.write(outputWavFile, Fs, allDataArray)