예제 #1
0
def find_music(audio_file):
    modelName = "pyAA/data/svmSM"

    [Fs, x] = aIO.readAudioFile(audio_file)
    duration = x.shape[0] / float(Fs)
    t1 = time.clock()
    flagsInd, classNames, acc, CMt = aS.mtFileClassification(
        audio_file, modelName, "svm", False, '')
    [
        Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
        computeBEAT
    ] = aT.loadSVModel(modelName)
    t2 = time.clock()
    perTime1 = duration / (t2 - t1)
    flags = [classNames[int(f)] for f in flagsInd]
    (segs, classes) = aS.flags2segs(flags, mtStep)

    i = 0  #len(classes)-1
    file_parts = []

    cbn = sox.Combiner()
    if len(classes) > 1:
        for c in classes:
            if c == 'music':
                start = segs[i][0]
                if i != 0:
                    start -= 0.5
                end = segs[i][1]
                if i != len(classes) - 1:
                    end += 2.5

                file_parts.append((int(start * 1000), int(end * 1000)))
            i += 1

    return file_parts
예제 #2
0
def initSubscriber():
    global features_subscriber
    global modelName
    global classifierInfo 
    global classification_publisher

    rospy.init_node("audio_features_classifier_node")
    modelName = rospy.get_param('~classifier_name', 'modelSVM')       
    sub_topic = rospy.get_param('~features_topic','/audio_features_extraction/features')
    pub_topic = rospy.get_param('~classification_topic','~audio_classification')
    [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = audioTrainTest.loadSVModel(os.path.dirname(os.path.realpath(sys.argv[0]))+'/classifier_data/'+modelName)
    classifierInfo["Classifier"] = Classifier
    classifierInfo["MEAN"] = MEAN
    classifierInfo["STD"] = STD
    classifierInfo["classNames"] = classNames
    classifierInfo["mtWin"] = mtWin
    classifierInfo["mtStep"] = mtStep
    classifierInfo["stWin"] = stWin
    classifierInfo["stStep"] = stStep
    classifierInfo["computeBEAT"] = computeBEAT
    print MEAN
    classification_publisher = rospy.Publisher("~"+pub_topic, classificationResult, queue_size=10)
    features_subscriber = rospy.Subscriber(sub_topic, featMsg, featuresCallback)
    print "Waiting for features_topic to be published..."
    rospy.spin()
예제 #3
0
    def __init__(self, model_path=None):
        assert (self.model_type == 'svm')

        if not model_path:
            ppath = os.path.join(os.environ['HOME'], '.speechworker')
            model_path = os.path.join(ppath, 'models/svmSM')

        [
            self.classifier, self.model_mean, self.model_sd, self.class_names,
            self.mt_win, self.mt_step, self.st_win, self.st_step, _
        ] = aT.loadSVModel(model_path)
예제 #4
0
def getMusicSegmentsFromFile(inputFile):
    modelType = "svm"
    modelName = "data/svmMovies8classes"

    dirOutput = inputFile[0:-4] + "_musicSegments"

    if os.path.exists(dirOutput) and dirOutput != ".":
        shutil.rmtree(dirOutput)
    os.makedirs(dirOutput)

    [Fs, x] = audioBasicIO.readAudioFile(inputFile)

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadKNNModel(modelName)

    flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile,
                                                            modelName,
                                                            modelType,
                                                            plotResults=False,
                                                            gtFile="")
    segs, classes = aS.flags2segs(flagsInd, mtStep)

    for i, s in enumerate(segs):
        if (classNames[int(classes[i])]
                == "Music") and (s[1] - s[0] >= minDuration):
            strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput + os.sep,
                                                       s[0], s[1])
            wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
예제 #5
0
def mtFileClassification(inputFile, modelName, modelType, plotResults=False, gtFile=""):
    '''
    This function performs mid-term classification of an audio stream.
    Towards this end, supervised knowledge is used, i.e. a pre-trained classifier.
    ARGUMENTS:
        - inputFile:        path of the input WAV file
        - modelName:        name of the classification model
        - modelType:        svm or knn depending on the classifier type
        - plotResults:      True if results are to be plotted using matplotlib along with a set of statistics

    RETURNS:
          - segs:           a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds)
          - classes:        a sequence of class flags: class[i] is the class ID of the i-th segment
    '''

    if not os.path.isfile(modelName):
        print("mtFileClassificationError: input modelType not found!")
        return (-1, -1, -1, -1)
    # Load classifier:
    if (modelType == 'svm') or (modelType == 'svm_rbf'):
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin,
            stStep, computeBEAT] = aT.loadSVModel(modelName)
    elif modelType == 'knn':
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin,
            stStep, computeBEAT] = aT.loadKNNModel(modelName)
    elif modelType == 'randomforest':
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin,
            stStep, computeBEAT] = aT.loadRandomForestModel(modelName)
    elif modelType == 'gradientboosting':
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT] = aT.loadGradientBoostingModel(modelName)
    elif modelType == 'extratrees':
        [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin,
            stStep, computeBEAT] = aT.loadExtraTreesModel(modelName)

    if computeBEAT:
        print("Model " + modelName +
              " contains long-term music features (beat etc) and cannot be used in segmentation")
        return (-1, -1, -1, -1)
    [Fs, x] = audioBasicIO.readAudioFile(inputFile)        # load input file
    if Fs == -1:                                           # could not read file
        return (-1, -1, -1, -1)
    # convert stereo (if) to mono
    x = audioBasicIO.stereo2mono(x)
    Duration = len(x) / Fs
    # mid-term feature extraction:
    [MidTermFeatures, _] = aF.mtFeatureExtraction(
        x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep))
    flags = []
    Ps = []
    flagsInd = []
    # for each feature vector (i.e. for each fix-sized segment):
    for i in range(MidTermFeatures.shape[1]):
        # normalize current feature vector
        curFV = (MidTermFeatures[:, i] - MEAN) / STD
        [Result, P] = aT.classifierWrapper(
            Classifier, modelType, curFV)    # classify vector
        flagsInd.append(Result)
        # update class label matrix
        flags.append(classNames[int(Result)])
        # update probability matrix
        Ps.append(numpy.max(P))
    flagsInd = numpy.array(flagsInd)

    # 1-window smoothing
    for i in range(1, len(flagsInd) - 1):
        if flagsInd[i - 1] == flagsInd[i + 1]:
            flagsInd[i] = flagsInd[i + 1]
    # convert fix-sized flags to segments and classes
    (segs, classes) = flags2segs(flags, mtStep)
    segs[-1] = len(x) / float(Fs)

    # Load grount-truth:
    if os.path.isfile(gtFile):
        [segStartGT, segEndGT, segLabelsGT] = readSegmentGT(gtFile)
        flagsGT, classNamesGT = segs2flags(
            segStartGT, segEndGT, segLabelsGT, mtStep)
        flagsIndGT = []
        for j, fl in enumerate(flagsGT):                    # "align" labels with GT
            if classNamesGT[flagsGT[j]] in classNames:
                flagsIndGT.append(classNames.index(classNamesGT[flagsGT[j]]))
            else:
                flagsIndGT.append(-1)
        flagsIndGT = numpy.array(flagsIndGT)
        CM = numpy.zeros((len(classNamesGT), len(classNamesGT)))
        for i in range(min(flagsInd.shape[0], flagsIndGT.shape[0])):
            CM[int(flagsIndGT[i]), int(flagsInd[i])] += 1
    else:
        CM = []
        flagsIndGT = numpy.array([])
    acc = plotSegmentationResults(
        flagsInd, flagsIndGT, classNames, mtStep, not plotResults)
    if acc >= 0:
        print("Overall Accuracy: {0:.3f}".format(acc))
        return (flagsInd, classNamesGT, acc, CM)
    else:
        return (flagsInd, classNames, acc, CM)
예제 #6
0
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec,
                       modelName, modelType):
    '''
    recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType)

    This function is used to record and analyze audio segments, in a fix window basis.

    ARGUMENTS:
    - duration			total recording duration
    - outputWavFile			path of the output WAV file
    - midTermBufferSizeSec		(fix)segment length in seconds
    - modelName			classification model name
    - modelType			classification model type

    '''

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadKNNModel(modelName)
    else:
        Classifier = None

    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK)
    inp.setchannels(1)
    inp.setrate(Fs)
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    inp.setperiodsize(512)
    midTermBufferSize = int(midTermBufferSizeSec * Fs)
    allData = []
    midTermBuffer = []
    curWindow = []
    count = 0

    while len(allData) < duration * Fs:
        # Read data from device
        l, data = inp.read()
        if l:
            for i in range(l):
                curWindow.append(audioop.getsample(data, 2, i))
            if (len(curWindow) + len(midTermBuffer) > midTermBufferSize):
                samplesToCopyToMidBuffer = midTermBufferSize - \
                    len(midTermBuffer)
            else:
                samplesToCopyToMidBuffer = len(curWindow)
            midTermBuffer = midTermBuffer + \
                curWindow[0:samplesToCopyToMidBuffer]
            del (curWindow[0:samplesToCopyToMidBuffer])
        if len(midTermBuffer) == midTermBufferSize:
            count += 1
            if Classifier != None:
                [mtFeatures,
                 stFeatures] = aF.mtFeatureExtraction(midTermBuffer, Fs,
                                                      2.0 * Fs, 2.0 * Fs,
                                                      0.020 * Fs, 0.020 * Fs)
                curFV = (mtFeatures[:, 0] - MEAN) / STD
                [result, P] = aT.classifierWrapper(Classifier, modelType,
                                                   curFV)
                print(classNames[int(result)])
            allData = allData + midTermBuffer

            plt.clf()
            plt.plot(midTermBuffer)
            plt.show(block=False)
            plt.draw()

            midTermBuffer = []

    allDataArray = numpy.int16(allData)
    wavfile.write(outputWavFile, Fs, allDataArray)
예제 #7
0
def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False):
    if not os.path.isfile(modelName):
        raise Exception("Input modelName not found!")

    if modelType == 'svm':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = aT.loadKNNModel(modelName)

    PsAll = numpy.zeros((len(classNames), ))

    files = "*.wav"
    if os.path.isdir(inputFolder):
        strFilePattern = os.path.join(inputFolder, files)
    else:
        strFilePattern = inputFolder + files

    wavFilesList = []
    wavFilesList.extend(glob.glob(strFilePattern))
    wavFilesList = sorted(wavFilesList)
    if len(wavFilesList) == 0:
        print("No WAV files found!")
        return

    Results = []
    for wavFile in wavFilesList:
        [Fs, x] = audioBasicIO.readAudioFile(wavFile)
        signalLength = x.shape[0] / float(Fs)
        [Result, P,
         classNames] = aT.fileClassification(wavFile, modelName, modelType)
        PsAll += (numpy.array(P) * signalLength)
        Result = int(Result)
        Results.append(Result)
        if outputMode:
            print("{0:s}\t{1:s}".format(wavFile, classNames[Result]))
    Results = numpy.array(Results)

    # print distribution of classes:
    [Histogram, _] = numpy.histogram(Results,
                                     bins=numpy.arange(len(classNames) + 1))
    if outputMode:
        for i, h in enumerate(Histogram):
            print("{0:20s}\t\t{1:d}".format(classNames[i], h))
    PsAll = PsAll / numpy.sum(PsAll)

    if outputMode:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.title("Classes percentage " + inputFolder.replace('Segments', ''))
        ax.axis((0, len(classNames) + 1, 0, 1))
        ax.set_xticks(numpy.array(range(len(classNames) + 1)))
        ax.set_xticklabels([" "] + classNames)
        ax.bar(numpy.array(range(len(classNames))) + 0.5, PsAll)
        plt.show()
    return classNames, PsAll