def find_music(audio_file): modelName = "pyAA/data/svmSM" [Fs, x] = aIO.readAudioFile(audio_file) duration = x.shape[0] / float(Fs) t1 = time.clock() flagsInd, classNames, acc, CMt = aS.mtFileClassification( audio_file, modelName, "svm", False, '') [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadSVModel(modelName) t2 = time.clock() perTime1 = duration / (t2 - t1) flags = [classNames[int(f)] for f in flagsInd] (segs, classes) = aS.flags2segs(flags, mtStep) i = 0 #len(classes)-1 file_parts = [] cbn = sox.Combiner() if len(classes) > 1: for c in classes: if c == 'music': start = segs[i][0] if i != 0: start -= 0.5 end = segs[i][1] if i != len(classes) - 1: end += 2.5 file_parts.append((int(start * 1000), int(end * 1000))) i += 1 return file_parts
def initSubscriber(): global features_subscriber global modelName global classifierInfo global classification_publisher rospy.init_node("audio_features_classifier_node") modelName = rospy.get_param('~classifier_name', 'modelSVM') sub_topic = rospy.get_param('~features_topic','/audio_features_extraction/features') pub_topic = rospy.get_param('~classification_topic','~audio_classification') [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = audioTrainTest.loadSVModel(os.path.dirname(os.path.realpath(sys.argv[0]))+'/classifier_data/'+modelName) classifierInfo["Classifier"] = Classifier classifierInfo["MEAN"] = MEAN classifierInfo["STD"] = STD classifierInfo["classNames"] = classNames classifierInfo["mtWin"] = mtWin classifierInfo["mtStep"] = mtStep classifierInfo["stWin"] = stWin classifierInfo["stStep"] = stStep classifierInfo["computeBEAT"] = computeBEAT print MEAN classification_publisher = rospy.Publisher("~"+pub_topic, classificationResult, queue_size=10) features_subscriber = rospy.Subscriber(sub_topic, featMsg, featuresCallback) print "Waiting for features_topic to be published..." rospy.spin()
def __init__(self, model_path=None): assert (self.model_type == 'svm') if not model_path: ppath = os.path.join(os.environ['HOME'], '.speechworker') model_path = os.path.join(ppath, 'models/svmSM') [ self.classifier, self.model_mean, self.model_sd, self.class_names, self.mt_win, self.mt_step, self.st_win, self.st_step, _ ] = aT.loadSVModel(model_path)
def getMusicSegmentsFromFile(inputFile): modelType = "svm" modelName = "data/svmMovies8classes" dirOutput = inputFile[0:-4] + "_musicSegments" if os.path.exists(dirOutput) and dirOutput != ".": shutil.rmtree(dirOutput) os.makedirs(dirOutput) [Fs, x] = audioBasicIO.readAudioFile(inputFile) if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadKNNModel(modelName) flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults=False, gtFile="") segs, classes = aS.flags2segs(flagsInd, mtStep) for i, s in enumerate(segs): if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration): strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput + os.sep, s[0], s[1]) wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
def mtFileClassification(inputFile, modelName, modelType, plotResults=False, gtFile=""): ''' This function performs mid-term classification of an audio stream. Towards this end, supervised knowledge is used, i.e. a pre-trained classifier. ARGUMENTS: - inputFile: path of the input WAV file - modelName: name of the classification model - modelType: svm or knn depending on the classifier type - plotResults: True if results are to be plotted using matplotlib along with a set of statistics RETURNS: - segs: a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds) - classes: a sequence of class flags: class[i] is the class ID of the i-th segment ''' if not os.path.isfile(modelName): print("mtFileClassificationError: input modelType not found!") return (-1, -1, -1, -1) # Load classifier: if (modelType == 'svm') or (modelType == 'svm_rbf'): [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName) elif modelType == 'knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName) elif modelType == 'randomforest': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadRandomForestModel(modelName) elif modelType == 'gradientboosting': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadGradientBoostingModel(modelName) elif modelType == 'extratrees': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadExtraTreesModel(modelName) if computeBEAT: print("Model " + modelName + " contains long-term music features (beat etc) and cannot be used in segmentation") return (-1, -1, -1, -1) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # load input file if Fs == -1: # could not read file return (-1, -1, -1, -1) # convert stereo (if) to mono x = audioBasicIO.stereo2mono(x) Duration = len(x) / Fs # mid-term feature extraction: [MidTermFeatures, _] = aF.mtFeatureExtraction( x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) flags = [] Ps = [] flagsInd = [] # for each feature vector (i.e. for each fix-sized segment): for i in range(MidTermFeatures.shape[1]): # normalize current feature vector curFV = (MidTermFeatures[:, i] - MEAN) / STD [Result, P] = aT.classifierWrapper( Classifier, modelType, curFV) # classify vector flagsInd.append(Result) # update class label matrix flags.append(classNames[int(Result)]) # update probability matrix Ps.append(numpy.max(P)) flagsInd = numpy.array(flagsInd) # 1-window smoothing for i in range(1, len(flagsInd) - 1): if flagsInd[i - 1] == flagsInd[i + 1]: flagsInd[i] = flagsInd[i + 1] # convert fix-sized flags to segments and classes (segs, classes) = flags2segs(flags, mtStep) segs[-1] = len(x) / float(Fs) # Load grount-truth: if os.path.isfile(gtFile): [segStartGT, segEndGT, segLabelsGT] = readSegmentGT(gtFile) flagsGT, classNamesGT = segs2flags( segStartGT, segEndGT, segLabelsGT, mtStep) flagsIndGT = [] for j, fl in enumerate(flagsGT): # "align" labels with GT if classNamesGT[flagsGT[j]] in classNames: flagsIndGT.append(classNames.index(classNamesGT[flagsGT[j]])) else: flagsIndGT.append(-1) flagsIndGT = numpy.array(flagsIndGT) CM = numpy.zeros((len(classNamesGT), len(classNamesGT))) for i in range(min(flagsInd.shape[0], flagsIndGT.shape[0])): CM[int(flagsIndGT[i]), int(flagsInd[i])] += 1 else: CM = [] flagsIndGT = numpy.array([]) acc = plotSegmentationResults( flagsInd, flagsIndGT, classNames, mtStep, not plotResults) if acc >= 0: print("Overall Accuracy: {0:.3f}".format(acc)) return (flagsInd, classNamesGT, acc, CM) else: return (flagsInd, classNames, acc, CM)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType): ''' recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType) This function is used to record and analyze audio segments, in a fix window basis. ARGUMENTS: - duration total recording duration - outputWavFile path of the output WAV file - midTermBufferSizeSec (fix)segment length in seconds - modelName classification model name - modelType classification model type ''' if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadKNNModel(modelName) else: Classifier = None inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) inp.setchannels(1) inp.setrate(Fs) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(512) midTermBufferSize = int(midTermBufferSizeSec * Fs) allData = [] midTermBuffer = [] curWindow = [] count = 0 while len(allData) < duration * Fs: # Read data from device l, data = inp.read() if l: for i in range(l): curWindow.append(audioop.getsample(data, 2, i)) if (len(curWindow) + len(midTermBuffer) > midTermBufferSize): samplesToCopyToMidBuffer = midTermBufferSize - \ len(midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + \ curWindow[0:samplesToCopyToMidBuffer] del (curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer) == midTermBufferSize: count += 1 if Classifier != None: [mtFeatures, stFeatures] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0 * Fs, 2.0 * Fs, 0.020 * Fs, 0.020 * Fs) curFV = (mtFeatures[:, 0] - MEAN) / STD [result, P] = aT.classifierWrapper(Classifier, modelType, curFV) print(classNames[int(result)]) allData = allData + midTermBuffer plt.clf() plt.plot(midTermBuffer) plt.show(block=False) plt.draw() midTermBuffer = [] allDataArray = numpy.int16(allData) wavfile.write(outputWavFile, Fs, allDataArray)
def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False): if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadKNNModel(modelName) PsAll = numpy.zeros((len(classNames), )) files = "*.wav" if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print("No WAV files found!") return Results = [] for wavFile in wavFilesList: [Fs, x] = audioBasicIO.readAudioFile(wavFile) signalLength = x.shape[0] / float(Fs) [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) PsAll += (numpy.array(P) * signalLength) Result = int(Result) Results.append(Result) if outputMode: print("{0:s}\t{1:s}".format(wavFile, classNames[Result])) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames) + 1)) if outputMode: for i, h in enumerate(Histogram): print("{0:20s}\t\t{1:d}".format(classNames[i], h)) PsAll = PsAll / numpy.sum(PsAll) if outputMode: fig = plt.figure() ax = fig.add_subplot(111) plt.title("Classes percentage " + inputFolder.replace('Segments', '')) ax.axis((0, len(classNames) + 1, 0, 1)) ax.set_xticks(numpy.array(range(len(classNames) + 1))) ax.set_xticklabels([" "] + classNames) ax.bar(numpy.array(range(len(classNames))) + 0.5, PsAll) plt.show() return classNames, PsAll