def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False): if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if modelType=='svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName) elif modelType=='knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName) PsAll = numpy.zeros((len(classNames), )) files = "*.wav" if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList)==0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Fs, x] = audioBasicIO.readAudioFile(wavFile) signalLength = x.shape[0] / float(Fs) [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) PsAll += (numpy.array(P) * signalLength) Result = int(Result) Results.append(Result) if outputMode: print "{0:s}\t{1:s}".format(wavFile,classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1)) if outputMode: for i,h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) PsAll = PsAll / numpy.sum(PsAll) if outputMode: fig = plt.figure() ax = fig.add_subplot(111) plt.title("Classes percentage " + inputFolder.replace('Segments','')) ax.axis((0, len(classNames)+1, 0, 1)) ax.set_xticks(numpy.array(range(len(classNames)+1))) ax.set_xticklabels([" "] + classNames) ax.bar(numpy.array(range(len(classNames)))+0.5, PsAll) plt.show() return classNames, PsAll
def getMusicSegmentsFromFile(inputFile): modelType = "svm" modelName = "data/svmMovies8classes" dirOutput = inputFile[0:-4] + "_musicSegments" if os.path.exists(dirOutput) and dirOutput != ".": shutil.rmtree(dirOutput) os.makedirs(dirOutput) [Fs, x] = audioBasicIO.readAudioFile(inputFile) if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat ] = aT.load_model(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat ] = aT.load_model_knn(modelName) flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults=False, gtFile="") segs, classes = aS.flags2segs(flagsInd, mtStep) for i, s in enumerate(segs): if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration): strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput + os.sep, s[0], s[1]) wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
def getMusicSegmentsFromFile(inputFile): modelType = "svm" modelName = "data/svmMovies8classes" dirOutput = inputFile[0:-4] + "_musicSegments" if os.path.exists(dirOutput) and dirOutput!=".": shutil.rmtree(dirOutput) os.makedirs(dirOutput) [Fs, x] = audioBasicIO.readAudioFile(inputFile) if modelType=='svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName) elif modelType=='knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName) flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = "") segs, classes = aS.flags2segs(flagsInd, mtStep) for i, s in enumerate(segs): if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration): strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput+os.sep, s[0], s[1]) wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])
def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False): if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat ] = aT.load_model(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat ] = aT.load_model_knn(modelName) PsAll = numpy.zeros((len(classNames), )) files = "*.wav" if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Fs, x] = audioBasicIO.readAudioFile(wavFile) signalLength = x.shape[0] / float(Fs) [Result, P, classNames] = aT.file_classification(wavFile, modelName, modelType) PsAll += (numpy.array(P) * signalLength) Result = int(Result) Results.append(Result) if outputMode: print "{0:s}\t{1:s}".format(wavFile, classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames) + 1)) if outputMode: for i, h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) PsAll = PsAll / numpy.sum(PsAll) if outputMode: fig = plt.figure() ax = fig.add_subplot(111) plt.title("Classes percentage " + inputFolder.replace('Segments', '')) ax.axis((0, len(classNames) + 1, 0, 1)) ax.set_xticks(numpy.array(range(len(classNames) + 1))) ax.set_xticklabels([" "] + classNames) ax.bar(numpy.array(range(len(classNames))) + 0.5, PsAll) plt.show() return classNames, PsAll
def mtFileClassification(input_file, model_name, model_type, plot_results=False, gt_file=""): ''' This function performs mid-term classification of an audio stream. Towards this end, supervised knowledge is used, i.e. a pre-trained classifier. ARGUMENTS: - input_file: path of the input WAV file - model_name: name of the classification model - model_type: svm or knn depending on the classifier type - plot_results: True if results are to be plotted using matplotlib along with a set of statistics RETURNS: - segs: a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds) - classes: a sequence of class flags: class[i] is the class ID of the i-th segment ''' if not os.path.isfile(model_name): print("mtFileClassificationError: input model_type not found!") return (-1, -1, -1, -1) # Load classifier: if model_type == "knn": [classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step, compute_beat] = \ aT.load_model_knn(model_name) else: [ classifier, MEAN, STD, class_names, mt_win, mt_step, st_win, st_step, compute_beat ] = aT.load_model(model_name) if compute_beat: print("Model " + model_name + " contains long-term music features " "(beat etc) and cannot be used in " "segmentation") return (-1, -1, -1, -1) [fs, x] = audioBasicIO.readAudioFile(input_file) # load input file if fs == -1: # could not read file return (-1, -1, -1, -1) x = audioBasicIO.stereo2mono(x) # convert stereo (if) to mono duration = len(x) / fs # mid-term feature extraction: [mt_feats, _, _] = aF.mtFeatureExtraction(x, fs, mt_win * fs, mt_step * fs, round(fs * st_win), round(fs * st_step)) flags = [] Ps = [] flags_ind = [] for i in range( mt_feats.shape[1] ): # for each feature vector (i.e. for each fix-sized segment): cur_fv = (mt_feats[:, i] - MEAN) / STD # normalize current feature vector [res, P] = aT.classifierWrapper(classifier, model_type, cur_fv) # classify vector flags_ind.append(res) flags.append(class_names[int(res)]) # update class label matrix Ps.append(numpy.max(P)) # update probability matrix flags_ind = numpy.array(flags_ind) # 1-window smoothing for i in range(1, len(flags_ind) - 1): if flags_ind[i - 1] == flags_ind[i + 1]: flags_ind[i] = flags_ind[i + 1] # convert fix-sized flags to segments and classes (segs, classes) = flags2segs(flags, mt_step) segs[-1] = len(x) / float(fs) # Load grount-truth: if os.path.isfile(gt_file): [seg_start_gt, seg_end_gt, seg_l_gt] = readSegmentGT(gt_file) flags_gt, class_names_gt = segs2flags(seg_start_gt, seg_end_gt, seg_l_gt, mt_step) flags_ind_gt = [] for j, fl in enumerate(flags_gt): # "align" labels with GT if class_names_gt[flags_gt[j]] in class_names: flags_ind_gt.append( class_names.index(class_names_gt[flags_gt[j]])) else: flags_ind_gt.append(-1) flags_ind_gt = numpy.array(flags_ind_gt) cm = numpy.zeros((len(class_names_gt), len(class_names_gt))) for i in range(min(flags_ind.shape[0], flags_ind_gt.shape[0])): cm[int(flags_ind_gt[i]), int(flags_ind[i])] += 1 else: cm = [] flags_ind_gt = numpy.array([]) acc = plotSegmentationResults(flags_ind, flags_ind_gt, class_names, mt_step, not plot_results) if acc >= 0: print("Overall Accuracy: {0:.3f}".format(acc)) return (flags_ind, class_names_gt, acc, cm) else: return (flags_ind, class_names, acc, cm)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType): ''' recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType) This function is used to record and analyze audio segments, in a fix window basis. ARGUMENTS: - duration total recording duration - outputWavFile path of the output WAV file - midTermBufferSizeSec (fix)segment length in seconds - modelName classification model name - modelType classification model type ''' if modelType=='svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName) elif modelType=='knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName) else: Classifier = None inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) inp.setchannels(1) inp.setrate(Fs) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(512) midTermBufferSize = int(midTermBufferSizeSec * Fs) allData = [] midTermBuffer = [] curWindow = [] count = 0 while len(allData)<duration*Fs: # Read data from device l,data = inp.read() if l: for i in range(l): curWindow.append(audioop.getsample(data, 2, i)) if (len(curWindow)+len(midTermBuffer)>midTermBufferSize): samplesToCopyToMidBuffer = midTermBufferSize - len(midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + curWindow[0:samplesToCopyToMidBuffer]; del(curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer) == midTermBufferSize: count += 1 if Classifier!=None: [mtFeatures, stFeatures, _] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0*Fs, 2.0*Fs, 0.020*Fs, 0.020*Fs) curFV = (mtFeatures[:,0] - MEAN) / STD; [result, P] = aT.classifierWrapper(Classifier, modelType, curFV) print classNames[int(result)] allData = allData + midTermBuffer plt.clf() plt.plot(midTermBuffer) plt.show(block = False) plt.draw() midTermBuffer = [] allDataArray = numpy.int16(allData) wavfile.write(outputWavFile, Fs, allDataArray)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType): ''' recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType) This function is used to record and analyze audio segments, in a fix window basis. ARGUMENTS: - duration total recording duration - outputWavFile path of the output WAV file - midTermBufferSizeSec (fix)segment length in seconds - modelName classification model name - modelType classification model type ''' if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat ] = aT.load_model(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat ] = aT.load_model_knn(modelName) else: Classifier = None inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) inp.setchannels(1) inp.setrate(Fs) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(512) midTermBufferSize = int(midTermBufferSizeSec * Fs) allData = [] midTermBuffer = [] curWindow = [] count = 0 while len(allData) < duration * Fs: # Read data from device l, data = inp.read() if l: for i in range(l): curWindow.append(audioop.getsample(data, 2, i)) if (len(curWindow) + len(midTermBuffer) > midTermBufferSize): samplesToCopyToMidBuffer = midTermBufferSize - len( midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + curWindow[ 0:samplesToCopyToMidBuffer] del (curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer) == midTermBufferSize: count += 1 if Classifier != None: [mtFeatures, stFeatures, _] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0 * Fs, 2.0 * Fs, 0.020 * Fs, 0.020 * Fs) curFV = (mtFeatures[:, 0] - MEAN) / STD [result, P] = aT.classifier_wrapper(Classifier, modelType, curFV) print classNames[int(result)] allData = allData + midTermBuffer plt.clf() plt.plot(midTermBuffer) plt.show(block=False) plt.draw() midTermBuffer = [] allDataArray = numpy.int16(allData) wavfile.write(outputWavFile, Fs, allDataArray)
def init_classifier(): global classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = aT.load_model(classifier_info[0])
def mid_term_file_classification(input_file, model_name, model_type, plot_results=False, gt_file=""): """ This function performs mid-term classification of an audio stream. Towards this end, supervised knowledge is used, i.e. a pre-trained classifier. ARGUMENTS: - input_file: path of the input WAV file - model_name: name of the classification model - model_type: svm or knn depending on the classifier type - plot_results: True if results are to be plotted using matplotlib along with a set of statistics RETURNS: - segs: a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds) - classes: a sequence of class flags: class[i] is the class ID of the i-th segment """ labels = [] accuracy = 0.0 class_names = [] cm = np.array([]) if not os.path.isfile(model_name): print("mtFileClassificationError: input model_type not found!") return labels, class_names, accuracy, cm # Load classifier: if model_type == "knn": classifier, mean, std, class_names, mt_win, mid_step, st_win, \ st_step, compute_beat = at.load_model_knn(model_name) else: classifier, mean, std, class_names, mt_win, mid_step, st_win, \ st_step, compute_beat = at.load_model(model_name) if compute_beat: print("Model " + model_name + " contains long-term music features " "(beat etc) and cannot be used in " "segmentation") return labels, class_names, accuracy, cm # load input file sampling_rate, signal = audioBasicIO.read_audio_file(input_file) # could not read file if sampling_rate == 0: return labels, class_names, accuracy, cm # convert stereo (if) to mono signal = audioBasicIO.stereo_to_mono(signal) # mid-term feature extraction: mt_feats, _, _ = \ mtf.mid_feature_extraction(signal, sampling_rate, mt_win * sampling_rate, mid_step * sampling_rate, round(sampling_rate * st_win), round(sampling_rate * st_step)) posterior_matrix = [] # for each feature vector (i.e. for each fix-sized segment): for col_index in range(mt_feats.shape[1]): # normalize current feature v feature_vector = (mt_feats[:, col_index] - mean) / std # classify vector: label_predicted, posterior = \ at.classifier_wrapper(classifier, model_type, feature_vector) labels.append(label_predicted) # update probability matrix posterior_matrix.append(np.max(posterior)) labels = np.array(labels) # convert fix-sized flags to segments and classes segs, classes = labels_to_segments(labels, mid_step) segs[-1] = len(signal) / float(sampling_rate) # Load grount-truth: labels_gt, class_names_gt, accuracy, cm = \ load_ground_truth(gt_file, labels, class_names, mid_step, plot_results) return labels, class_names, accuracy, cm