def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False):
	if not os.path.isfile(modelName):
		raise Exception("Input modelName not found!")

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName)

	PsAll = numpy.zeros((len(classNames), ))	
		
	files = "*.wav"
	if os.path.isdir(inputFolder):
		strFilePattern = os.path.join(inputFolder, files)
	else:
		strFilePattern = inputFolder + files

	wavFilesList = []
	wavFilesList.extend(glob.glob(strFilePattern))
	wavFilesList = sorted(wavFilesList)
	if len(wavFilesList)==0:
		print "No WAV files found!"
		return 
	
	Results = []
	for wavFile in wavFilesList:	
		[Fs, x] = audioBasicIO.readAudioFile(wavFile)	
		signalLength = x.shape[0] / float(Fs)
		[Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType)					
		PsAll += (numpy.array(P) * signalLength)		
		Result = int(Result)
		Results.append(Result)
		if outputMode:
			print "{0:s}\t{1:s}".format(wavFile,classNames[Result])
	Results = numpy.array(Results)
	
	# print distribution of classes:
	[Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1))
	if outputMode:	
		for i,h in enumerate(Histogram):
			print "{0:20s}\t\t{1:d}".format(classNames[i], h)
	PsAll = PsAll / numpy.sum(PsAll)


	if outputMode:	
		fig = plt.figure()
		ax = fig.add_subplot(111)
		plt.title("Classes percentage " + inputFolder.replace('Segments',''))
		ax.axis((0, len(classNames)+1, 0, 1))
		ax.set_xticks(numpy.array(range(len(classNames)+1)))
		ax.set_xticklabels([" "] + classNames)
		ax.bar(numpy.array(range(len(classNames)))+0.5, PsAll)
		plt.show()
	return classNames, PsAll
Esempio n. 2
0
def getMusicSegmentsFromFile(inputFile):
    modelType = "svm"
    modelName = "data/svmMovies8classes"

    dirOutput = inputFile[0:-4] + "_musicSegments"

    if os.path.exists(dirOutput) and dirOutput != ".":
        shutil.rmtree(dirOutput)
    os.makedirs(dirOutput)

    [Fs, x] = audioBasicIO.readAudioFile(inputFile)

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            compute_beat
        ] = aT.load_model(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            compute_beat
        ] = aT.load_model_knn(modelName)

    flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile,
                                                            modelName,
                                                            modelType,
                                                            plotResults=False,
                                                            gtFile="")
    segs, classes = aS.flags2segs(flagsInd, mtStep)

    for i, s in enumerate(segs):
        if (classNames[int(classes[i])]
                == "Music") and (s[1] - s[0] >= minDuration):
            strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput + os.sep,
                                                       s[0], s[1])
            wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
def getMusicSegmentsFromFile(inputFile):	
	modelType = "svm"
	modelName = "data/svmMovies8classes"
	
	dirOutput = inputFile[0:-4] + "_musicSegments"
	
	if os.path.exists(dirOutput) and dirOutput!=".":
		shutil.rmtree(dirOutput)	
	os.makedirs(dirOutput)	
	
	[Fs, x] = audioBasicIO.readAudioFile(inputFile)	

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName)

	flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = "")
	segs, classes = aS.flags2segs(flagsInd, mtStep)

	for i, s in enumerate(segs):
		if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration):
			strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput+os.sep, s[0], s[1])	
			wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])
Esempio n. 4
0
def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False):
    if not os.path.isfile(modelName):
        raise Exception("Input modelName not found!")

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            compute_beat
        ] = aT.load_model(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            compute_beat
        ] = aT.load_model_knn(modelName)

    PsAll = numpy.zeros((len(classNames), ))

    files = "*.wav"
    if os.path.isdir(inputFolder):
        strFilePattern = os.path.join(inputFolder, files)
    else:
        strFilePattern = inputFolder + files

    wavFilesList = []
    wavFilesList.extend(glob.glob(strFilePattern))
    wavFilesList = sorted(wavFilesList)
    if len(wavFilesList) == 0:
        print "No WAV files found!"
        return

    Results = []
    for wavFile in wavFilesList:
        [Fs, x] = audioBasicIO.readAudioFile(wavFile)
        signalLength = x.shape[0] / float(Fs)
        [Result, P,
         classNames] = aT.file_classification(wavFile, modelName, modelType)
        PsAll += (numpy.array(P) * signalLength)
        Result = int(Result)
        Results.append(Result)
        if outputMode:
            print "{0:s}\t{1:s}".format(wavFile, classNames[Result])
    Results = numpy.array(Results)

    # print distribution of classes:
    [Histogram, _] = numpy.histogram(Results,
                                     bins=numpy.arange(len(classNames) + 1))
    if outputMode:
        for i, h in enumerate(Histogram):
            print "{0:20s}\t\t{1:d}".format(classNames[i], h)
    PsAll = PsAll / numpy.sum(PsAll)

    if outputMode:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.title("Classes percentage " + inputFolder.replace('Segments', ''))
        ax.axis((0, len(classNames) + 1, 0, 1))
        ax.set_xticks(numpy.array(range(len(classNames) + 1)))
        ax.set_xticklabels([" "] + classNames)
        ax.bar(numpy.array(range(len(classNames))) + 0.5, PsAll)
        plt.show()
    return classNames, PsAll
def mtFileClassification(input_file,
                         model_name,
                         model_type,
                         plot_results=False,
                         gt_file=""):
    '''
	This function performs mid-term classification of an audio stream.
	Towards this end, supervised knowledge is used, i.e. a pre-trained classifier.
	ARGUMENTS:
		- input_file:        path of the input WAV file
		- model_name:        name of the classification model
		- model_type:        svm or knn depending on the classifier type
		- plot_results:      True if results are to be plotted using
							 matplotlib along with a set of statistics

	RETURNS:
		  - segs:           a sequence of segment's endpoints: segs[i] is the
							endpoint of the i-th segment (in seconds)
		  - classes:        a sequence of class flags: class[i] is the
							class ID of the i-th segment
	'''

    if not os.path.isfile(model_name):
        print("mtFileClassificationError: input model_type not found!")
        return (-1, -1, -1, -1)
    # Load classifier:
    if model_type == "knn":
        [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step, compute_beat] = \
         aT.load_model_knn(model_name)
    else:
        [
            classifier, MEAN, STD, class_names, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = aT.load_model(model_name)

    if compute_beat:
        print("Model " + model_name + " contains long-term music features "
              "(beat etc) and cannot be used in "
              "segmentation")
        return (-1, -1, -1, -1)
    [fs, x] = audioBasicIO.readAudioFile(input_file)  # load input file
    if fs == -1:  # could not read file
        return (-1, -1, -1, -1)
    x = audioBasicIO.stereo2mono(x)  # convert stereo (if) to mono
    duration = len(x) / fs
    # mid-term feature extraction:
    [mt_feats, _, _] = aF.mtFeatureExtraction(x, fs, mt_win * fs, mt_step * fs,
                                              round(fs * st_win),
                                              round(fs * st_step))
    flags = []
    Ps = []
    flags_ind = []
    for i in range(
            mt_feats.shape[1]
    ):  # for each feature vector (i.e. for each fix-sized segment):
        cur_fv = (mt_feats[:, i] -
                  MEAN) / STD  # normalize current feature vector
        [res, P] = aT.classifierWrapper(classifier, model_type,
                                        cur_fv)  # classify vector
        flags_ind.append(res)
        flags.append(class_names[int(res)])  # update class label matrix
        Ps.append(numpy.max(P))  # update probability matrix
    flags_ind = numpy.array(flags_ind)

    # 1-window smoothing
    for i in range(1, len(flags_ind) - 1):
        if flags_ind[i - 1] == flags_ind[i + 1]:
            flags_ind[i] = flags_ind[i + 1]
    # convert fix-sized flags to segments and classes
    (segs, classes) = flags2segs(flags, mt_step)
    segs[-1] = len(x) / float(fs)

    # Load grount-truth:
    if os.path.isfile(gt_file):
        [seg_start_gt, seg_end_gt, seg_l_gt] = readSegmentGT(gt_file)
        flags_gt, class_names_gt = segs2flags(seg_start_gt, seg_end_gt,
                                              seg_l_gt, mt_step)
        flags_ind_gt = []
        for j, fl in enumerate(flags_gt):
            # "align" labels with GT
            if class_names_gt[flags_gt[j]] in class_names:
                flags_ind_gt.append(
                    class_names.index(class_names_gt[flags_gt[j]]))
            else:
                flags_ind_gt.append(-1)
        flags_ind_gt = numpy.array(flags_ind_gt)
        cm = numpy.zeros((len(class_names_gt), len(class_names_gt)))
        for i in range(min(flags_ind.shape[0], flags_ind_gt.shape[0])):
            cm[int(flags_ind_gt[i]), int(flags_ind[i])] += 1
    else:
        cm = []
        flags_ind_gt = numpy.array([])
    acc = plotSegmentationResults(flags_ind, flags_ind_gt, class_names,
                                  mt_step, not plot_results)
    if acc >= 0:
        print("Overall Accuracy: {0:.3f}".format(acc))
        return (flags_ind, class_names_gt, acc, cm)
    else:
        return (flags_ind, class_names, acc, cm)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType):
	'''
	recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType)

	This function is used to record and analyze audio segments, in a fix window basis.

	ARGUMENTS: 
	- duration			total recording duration
	- outputWavFile			path of the output WAV file
	- midTermBufferSizeSec		(fix)segment length in seconds
	- modelName			classification model name
	- modelType			classification model type

	'''

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName)
	else:
		Classifier = None

	inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
	inp.setchannels(1)
	inp.setrate(Fs)
	inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
	inp.setperiodsize(512)
	midTermBufferSize = int(midTermBufferSizeSec * Fs)
	allData = []
	midTermBuffer = []
	curWindow = []
	count = 0

	while len(allData)<duration*Fs:
		# Read data from device
		l,data = inp.read()
	    	if l:
			for i in range(l):
				curWindow.append(audioop.getsample(data, 2, i))		
			if (len(curWindow)+len(midTermBuffer)>midTermBufferSize):
				samplesToCopyToMidBuffer = midTermBufferSize - len(midTermBuffer)
			else:
				samplesToCopyToMidBuffer = len(curWindow)
			midTermBuffer = midTermBuffer + curWindow[0:samplesToCopyToMidBuffer];
			del(curWindow[0:samplesToCopyToMidBuffer])
		if len(midTermBuffer) == midTermBufferSize:
			count += 1						
			if Classifier!=None:
				[mtFeatures, stFeatures, _] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0*Fs, 2.0*Fs, 0.020*Fs, 0.020*Fs)
				curFV = (mtFeatures[:,0] - MEAN) / STD;
				[result, P] = aT.classifierWrapper(Classifier, modelType, curFV)
				print classNames[int(result)]
			allData = allData + midTermBuffer

			plt.clf()
			plt.plot(midTermBuffer)
			plt.show(block = False)
			plt.draw()


			midTermBuffer = []

	allDataArray = numpy.int16(allData)
	wavfile.write(outputWavFile, Fs, allDataArray)
Esempio n. 7
0
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec,
                       modelName, modelType):
    '''
	recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType)

	This function is used to record and analyze audio segments, in a fix window basis.

	ARGUMENTS: 
	- duration			total recording duration
	- outputWavFile			path of the output WAV file
	- midTermBufferSizeSec		(fix)segment length in seconds
	- modelName			classification model name
	- modelType			classification model type

	'''

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            compute_beat
        ] = aT.load_model(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            compute_beat
        ] = aT.load_model_knn(modelName)
    else:
        Classifier = None

    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
    inp.setchannels(1)
    inp.setrate(Fs)
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    inp.setperiodsize(512)
    midTermBufferSize = int(midTermBufferSizeSec * Fs)
    allData = []
    midTermBuffer = []
    curWindow = []
    count = 0

    while len(allData) < duration * Fs:
        # Read data from device
        l, data = inp.read()
        if l:
            for i in range(l):
                curWindow.append(audioop.getsample(data, 2, i))
            if (len(curWindow) + len(midTermBuffer) > midTermBufferSize):
                samplesToCopyToMidBuffer = midTermBufferSize - len(
                    midTermBuffer)
            else:
                samplesToCopyToMidBuffer = len(curWindow)
            midTermBuffer = midTermBuffer + curWindow[
                0:samplesToCopyToMidBuffer]
            del (curWindow[0:samplesToCopyToMidBuffer])
        if len(midTermBuffer) == midTermBufferSize:
            count += 1
            if Classifier != None:
                [mtFeatures, stFeatures,
                 _] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0 * Fs,
                                             2.0 * Fs, 0.020 * Fs, 0.020 * Fs)
                curFV = (mtFeatures[:, 0] - MEAN) / STD
                [result, P] = aT.classifier_wrapper(Classifier, modelType,
                                                    curFV)
                print classNames[int(result)]
            allData = allData + midTermBuffer

            plt.clf()
            plt.plot(midTermBuffer)
            plt.show(block=False)
            plt.draw()

            midTermBuffer = []

    allDataArray = numpy.int16(allData)
    wavfile.write(outputWavFile, Fs, allDataArray)
def init_classifier():
    global classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat
    [
        classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step,
        compute_beat
    ] = aT.load_model(classifier_info[0])
Esempio n. 9
0
def mid_term_file_classification(input_file, model_name, model_type,
                                 plot_results=False, gt_file=""):
    """
    This function performs mid-term classification of an audio stream.
    Towards this end, supervised knowledge is used,
    i.e. a pre-trained classifier.
    ARGUMENTS:
        - input_file:        path of the input WAV file
        - model_name:        name of the classification model
        - model_type:        svm or knn depending on the classifier type
        - plot_results:      True if results are to be plotted using
                             matplotlib along with a set of statistics

    RETURNS:
          - segs:           a sequence of segment's endpoints: segs[i] is the
                            endpoint of the i-th segment (in seconds)
          - classes:        a sequence of class flags: class[i] is the
                            class ID of the i-th segment
    """
    labels = []
    accuracy = 0.0
    class_names = []
    cm = np.array([])
    if not os.path.isfile(model_name):
        print("mtFileClassificationError: input model_type not found!")
        return labels, class_names, accuracy, cm

    # Load classifier:
    if model_type == "knn":
        classifier, mean, std, class_names, mt_win, mid_step, st_win, \
         st_step, compute_beat = at.load_model_knn(model_name)
    else:
        classifier, mean, std, class_names, mt_win, mid_step, st_win, \
         st_step, compute_beat = at.load_model(model_name)
    if compute_beat:
        print("Model " + model_name + " contains long-term music features "
                                      "(beat etc) and cannot be used in "
                                      "segmentation")
        return labels, class_names, accuracy, cm
    # load input file
    sampling_rate, signal = audioBasicIO.read_audio_file(input_file)

    # could not read file
    if sampling_rate == 0:
        return labels, class_names, accuracy, cm

    # convert stereo (if) to mono
    signal = audioBasicIO.stereo_to_mono(signal)

    # mid-term feature extraction:
    mt_feats, _, _ = \
        mtf.mid_feature_extraction(signal, sampling_rate,
                                   mt_win * sampling_rate,
                                   mid_step * sampling_rate,
                                   round(sampling_rate * st_win),
                                   round(sampling_rate * st_step))
    posterior_matrix = []

    # for each feature vector (i.e. for each fix-sized segment):
    for col_index in range(mt_feats.shape[1]):
        # normalize current feature v
        feature_vector = (mt_feats[:, col_index] - mean) / std

        # classify vector:
        label_predicted, posterior = \
            at.classifier_wrapper(classifier, model_type, feature_vector)
        labels.append(label_predicted)

        # update probability matrix
        posterior_matrix.append(np.max(posterior))
    labels = np.array(labels)

    # convert fix-sized flags to segments and classes
    segs, classes = labels_to_segments(labels, mid_step)
    segs[-1] = len(signal) / float(sampling_rate)
    # Load grount-truth:
    labels_gt, class_names_gt, accuracy, cm = \
        load_ground_truth(gt_file, labels, class_names, mid_step, plot_results)

    return labels, class_names, accuracy, cm