Esempio n. 1
0
def classifyNN(inputFile, modelName):

    [
        Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
        computeBEAT
    ] = loadModel(modelName)
    [Fs, x] = audioBasicIO.readAudioFile(inputFile)
    x = audioBasicIO.stereo2mono(x)

    if isinstance(x, int):
        return (-1, -1, -1)
    if x.shape[0] / float(Fs) <= mtWin:
        return (-1, -1, -1)

    [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs,
                                                  mtWin * Fs, mtStep * Fs,
                                                  round(Fs * stWin),
                                                  round(Fs * stStep))
    MidTermFeatures = MidTermFeatures.mean(
        axis=1)  # long term averaging of mid-term statistics
    if computeBEAT:
        [beat, beatConf] = aF.beatExtraction(s, stStep)
        MidTermFeatures = numpy.append(MidTermFeatures, beat)
        MidTermFeatures = numpy.append(MidTermFeatures, beatConf)
    curFV = (MidTermFeatures - MEAN) / STD  # normalization

    [Result, P] = classify(Classifier, curFV)
    return Result, P, classNames
Esempio n. 2
0
def feature_combined(filename, smooth_window_size=0):
    rate, wav_data = wavfile.read(filename)

    external_vect = external_feature_vects(wav_data, rate)
    BPM, _ = audioFeatureExtraction.beatExtraction(external_vect, 0.050)
    method_1 = np.mean(external_vect, axis=0)

    FFT_data = get_FFT_data(wav_data, rate)
    if smooth_window_size != 0:
        FFT_data = run_avg(FFT_data, smooth_window_size)
    BPM = get_beat(wav_data, rate)
    x_vals = FFT_indices_to_hz(FFT_data, rate)

    note_intensities = [
        max_note_intensity(x_vals, FFT_data, i) for i in range(0, 97)
    ]
    method_3 = np.append(note_intensities, BPM)

    avg = total_avg_note_intensity(x_vals, FFT_data)
    max = [
        np.sum([note_intensities[i + 12 * j] for j in range(0, 8)])
        for i in range(0, 12)
    ]
    method_2 = np.append(avg, np.append(max, BPM))

    #vals = top_n_vals(FFT_data, x_vals, 30)
    #method_4 = np.append(vals, BPM)

    return [method_1, method_2, method_3]
Esempio n. 3
0
def segmentClassification(data, model_name, model_type):
    if not os.path.isfile(model_name):
        print("fileClassification: input model_name not found!")
        return (-1, -1, -1)

    if model_type == 'knn':
        [classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step,
         compute_beat] = load_model_knn(model_name)
    else:
        [classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step,
         compute_beat] = load_model(model_name)

    [Fs, x] = 250000, audioBasicIO.stereo2mono(data)

    # feature extraction:
    [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step))
    mt_features = mt_features.mean(axis=1)        # long term averaging of mid-term statistics
    if compute_beat:
        [beat, beatConf] = aF.beatExtraction(s, st_step)
        mt_features = numpy.append(mt_features, beat)
        mt_features = numpy.append(mt_features, beatConf)
    curFV = (mt_features - MEAN) / STD                # normalization

    [Result, P] = classifierWrapper(classifier, model_type, curFV)    # classification        
    return Result, P, classNames
Esempio n. 4
0
def beatExtractionWrapper(wav_file, plot):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs)
    bpm, ratio = aF.beatExtraction(F, 0.050, plot)
    print("Beat: {0:d} bpm ".format(int(bpm)))
    print("Ratio: {0:.2f} ".format(ratio))
Esempio n. 5
0
def beatExtractionWrapper(wav_file, plot):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs)
    bpm, ratio = aF.beatExtraction(F, 0.050, plot)
    print("Beat: {0:d} bpm ".format(int(bpm)))
    print("Ratio: {0:.2f} ".format(ratio))
Esempio n. 6
0
def beatExtractionWrapper(wavFileName, plot):
    if not os.path.isfile(wavFileName):
        raise Exception("Input audio file not found!")
    [Fs, x] = audioBasicIO.readAudioFile(wavFileName)
    F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs)
    BPM, ratio = aF.beatExtraction(F, 0.050, plot)
    print("Beat: {0:d} bpm ".format(int(BPM)))
    print("Ratio: {0:.2f} ".format(ratio))
Esempio n. 7
0
def fileRegression(inputFile, modelName, modelType):
    # Load classifier:

    if not os.path.isfile(inputFile):
        print("fileClassification: wav file not found!")
        return (-1, -1, -1)

    regressionModels = glob.glob(modelName + "_*")
    regressionModels2 = []
    for r in regressionModels:
        if r[-5::] != "MEANS":
            regressionModels2.append(r)
    regressionModels = regressionModels2
    regressionNames = []
    for r in regressionModels:
        regressionNames.append(r[r.rfind("_") + 1::])

    # FEATURE EXTRACTION
    # LOAD ONLY THE FIRST MODEL (for mtWin, etc)
    if modelType == 'svm' or modelType == "svm_rbf":
        [_, _, _, mtWin, mtStep, stWin, stStep,
         computeBEAT] = loadSVModel(regressionModels[0], True)
    elif modelType == 'randomforest':
        [_, _, _, mtWin, mtStep, stWin, stStep,
         computeBEAT] = loadRandomForestModel(regressionModels[0], True)

    # read audio file and convert to mono
    [Fs, x] = audioBasicIO.readAudioFile(inputFile)
    x = audioBasicIO.stereo2mono(x)
    # feature extraction:
    [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs,
                                                  mtWin * Fs, mtStep * Fs,
                                                  round(Fs * stWin),
                                                  round(Fs * stStep))
    # long term averaging of mid-term statistics
    MidTermFeatures = MidTermFeatures.mean(axis=1)
    if computeBEAT:
        [beat, beatConf] = aF.beatExtraction(s, stStep)
        MidTermFeatures = numpy.append(MidTermFeatures, beat)
        MidTermFeatures = numpy.append(MidTermFeatures, beatConf)

    # REGRESSION
    R = []
    for ir, r in enumerate(regressionModels):
        if not os.path.isfile(r):
            print("fileClassification: input modelName not found!")
            return (-1, -1, -1)
        if modelType == 'svm' or modelType == "svm_rbf":
            [Model, MEAN, STD, mtWin, mtStep, stWin, stStep,
             computeBEAT] = loadSVModel(r, True)
        elif modelType == 'randomforest':
            [Model, MEAN, STD, mtWin, mtStep, stWin, stStep,
             computeBEAT] = loadRandomForestModel(r, True)
        curFV = (MidTermFeatures - MEAN) / STD  # normalization
        # classification
        R.append(regressionWrapper(Model, modelType, curFV))
    return R, regressionNames
Esempio n. 8
0
def fileClassification(inputFile, model_name, model_type):
    # Load classifier:
    print("Loading Classifier")

    if not os.path.isfile(model_name):
        print("fileClassification: input model_name not found!")
        return (-1, -1, -1)

    if not os.path.isfile(inputFile):
        print("fileClassification: wav file not found!")
        return (-1, -1, -1)

    if model_type == 'knn':
        [
            classifier, MEAN, STD, classNames, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = load_model_knn(model_name)
    else:
        [
            classifier, MEAN, STD, classNames, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = load_model(model_name)

    print("Printing Classnames")
    print(classNames)

    [Fs, x] = audioBasicIO.readAudioFile(
        inputFile)  # read audio file and convert to mono
    x = audioBasicIO.stereo2mono(x)

    if isinstance(x, int):  # audio file IO problem
        return (-1, -1, -1)
        print('io problem')
    if x.shape[0] / float(Fs) <= mt_win:
        return (-1, -1, -1)

    # feature extraction:
    [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs,
                                                 mt_step * Fs,
                                                 round(Fs * st_win),
                                                 round(Fs * st_step))
    mt_features = mt_features.mean(
        axis=1)  # long term averaging of mid-term statistics
    if compute_beat:
        [beat, beatConf] = aF.beatExtraction(s, st_step)
        mt_features = numpy.append(mt_features, beat)
        mt_features = numpy.append(mt_features, beatConf)
    curFV = (mt_features - MEAN) / STD  # normalization

    [Result, P] = classifierWrapper(classifier, model_type,
                                    curFV)  # classification
    return Result, P, classNames
Esempio n. 9
0
def bufferRegression(audioBuffer, sampleRate, model_name, model_type):
    # Load classifier:
    regression_models = glob.glob(model_name + "_*")
    regression_models2 = []
    for r in regression_models:
        if r[-5::] != "MEANS":
            regression_models2.append(r)
    regression_models = regression_models2
    regression_names = []
    for r in regression_models:
        regression_names.append(r[r.rfind("_") + 1::])

    # FEATURE EXTRACTION
    # LOAD ONLY THE FIRST MODEL (for mt_win, etc)
    if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest':
        [_, _, _, mt_win, mt_step, st_win, st_step,
         compute_beat] = load_model(regression_models[0], True)

    Fs = sampleRate
    x = audioBuffer

    # feature extraction:
    [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs,
                                                 mt_step * Fs,
                                                 round(Fs * st_win),
                                                 round(Fs * st_step))
    mt_features = mt_features.mean(
        axis=1)  # long term averaging of mid-term statistics
    if compute_beat:
        [beat, beatConf] = aF.beatExtraction(s, st_step)
        mt_features = numpy.append(mt_features, beat)
        mt_features = numpy.append(mt_features, beatConf)

    # REGRESSION
    R = []
    for ir, r in enumerate(regression_models):
        if not os.path.isfile(r):
            print("fileClassification: input model_name not found!")
            return (-1, -1, -1)
        if model_type == 'svm' or model_type == "svm_rbf" \
                or model_type == 'randomforest':
            [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = \
                load_model(r, True)
        curFV = (mt_features - MEAN) / STD  # normalization
        R.append(regressionWrapper(model, model_type, curFV))  # classification
    return R, regression_names
Esempio n. 10
0
def bufferClassification(audioBuffer, sampleRate, model_name, model_type):
    # Load classifier:
    if model_type == 'knn':
        [
            classifier, MEAN, STD, classNames, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = load_model_knn(model_name)
    else:
        [
            classifier, MEAN, STD, classNames, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = load_model(model_name)

    if isinstance(audioBuffer, int):  # audio buffer format problem
        print("bufferClassification: bad audio format!")
        return (-1, -1, -1)
    if audioBuffer.shape[0] / float(sampleRate) <= mt_win:
        print(
            "bufferClassification: too little audio to analyze with medium term window",
            mt_win)
        return (-1, -1, -1)

    # feature extraction:
    [mt_features, s,
     _] = aF.mtFeatureExtraction(audioBuffer, sampleRate, mt_win * sampleRate,
                                 mt_step * sampleRate,
                                 round(sampleRate * st_win),
                                 round(sampleRate * st_step))
    mt_features = mt_features.mean(
        axis=1)  # long term averaging of mid-term statistics
    if compute_beat:
        [beat, beatConf] = aF.beatExtraction(s, st_step)
        mt_features = numpy.append(mt_features, beat)
        mt_features = numpy.append(mt_features, beatConf)
    curFV = (mt_features - MEAN) / STD  # normalization

    [Result, P] = classifierWrapper(classifier, model_type,
                                    curFV)  # classification
    return Result, P, classNames
Esempio n. 11
0
def getBPM():
    while (1):
        print "LISTENING~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~SHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH"
        soundRecorder.soundRecord5Sec()
        [Fs, x] = audioBasicIO.readAudioFile("file.wav")
        x = x[:, 0]  # sometimes necessary due to different wav files...
        winSize = .1  # size of the window to extract data from
        winStep = winSize / 2  # 50% overlap steps
        F = audioFeatureExtraction.stFeatureExtraction(x, Fs, winSize * Fs,
                                                       winStep * Fs)
        soundRecorder.soundRecord5Sec()
        #plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel('ZCR');
        #plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel('Energy'); plt.show()

        [BPM, RATIO] = audioFeatureExtraction.beatExtraction(F,
                                                             winSize,
                                                             PLOT=False)

        #BPM is lower by a factor of 2 from the actual bpm. So we will multiply by 2.
        BPM = BPM * 2
        global globalNewBPM
        globalNewBPM = BPM
        print "I heard this BPM, master!",
Esempio n. 12
0
def dirWavFeatureExtraction(dirName,
                            mt_win,
                            mt_step,
                            st_win,
                            st_step,
                            feats,
                            compute_beat=False):
    """
    This function extracts the mid-term features of the WAVE files of a particular folder.

    The resulting feature vector is extracted by long-term averaging the mid-term features.
    Therefore ONE FEATURE VECTOR is extracted for each WAV file.

    ARGUMENTS:
        - dirName:        the path of the WAVE directory
        - mt_win, mt_step:    mid-term window and step (in seconds)
        - st_win, st_step:    short-term window and step (in seconds)
    """

    all_mt_feats = numpy.array([])
    process_times = []

    types = ('*.wav', '*.aif', '*.aiff', '*.mp3', '*.au', '*.ogg')
    wav_file_list = []
    for files in types:
        wav_file_list.extend(glob.glob(os.path.join(dirName, files)))

    wav_file_list = sorted(wav_file_list)
    wav_file_list2, mt_feature_names = [], []
    for i, wavFile in enumerate(wav_file_list):
        print("Analyzing file {0:d} of "
              "{1:d}: {2:s}".format(i + 1, len(wav_file_list), wavFile))
        if os.stat(wavFile).st_size == 0:
            print("   (EMPTY FILE -- SKIPPING)")
            continue
        [fs, x] = audioBasicIO.readAudioFile(wavFile)
        if isinstance(x, int):
            continue

        t1 = time.clock()
        x = audioBasicIO.stereo2mono(x)
        if x.shape[0] < float(fs) / 5:
            print("  (AUDIO FILE TOO SMALL - SKIPPING)")
            continue
        wav_file_list2.append(wavFile)
        if compute_beat:
            [mt_term_feats, st_features, mt_feature_names] = \
                mtFeatureExtraction(x, fs, round(mt_win * fs),
                                    round(mt_step * fs),
                                    round(fs * st_win), round(fs * st_step), feats)
            [beat, beat_conf] = beatExtraction(st_features, st_step)
        else:
            [mt_term_feats, _, mt_feature_names] = \
                mtFeatureExtraction(x, fs, round(mt_win * fs),
                                    round(mt_step * fs),
                                    round(fs * st_win), round(fs * st_step), feats)

        mt_term_feats = numpy.transpose(mt_term_feats)
        mt_term_feats = mt_term_feats.mean(axis=0)
        # long term averaging of mid-term statistics
        if (not numpy.isnan(mt_term_feats).any()) and \
                (not numpy.isinf(mt_term_feats).any()):
            if compute_beat:
                mt_term_feats = numpy.append(mt_term_feats, beat)
                mt_term_feats = numpy.append(mt_term_feats, beat_conf)
            if len(all_mt_feats) == 0:
                # append feature vector
                all_mt_feats = mt_term_feats
            else:
                all_mt_feats = numpy.vstack((all_mt_feats, mt_term_feats))
            t2 = time.clock()
            duration = float(len(x)) / fs
            process_times.append((t2 - t1) / duration)
    if len(process_times) > 0:
        print("Feature extraction complexity ratio: "
              "{0:.1f} x realtime".format(
                  (1.0 / numpy.mean(numpy.array(process_times)))))
    return (all_mt_feats, wav_file_list2, mt_feature_names)
Esempio n. 13
0
def get_beat(wav_data, rate):
    features = audioFeatureExtraction.stFeatureExtraction(
        wav_data, rate, 0.050 * rate, 0.050 * rate)
    BPM, r = audioFeatureExtraction.beatExtraction(features, 0.050)
    return BPM
Esempio n. 14
0
def emotion_from_speech(Fs, x, log, model_name="pyAudioAnalysis/pyAudioAnalysis/data/svmSpeechEmotion", model_type="svm"):
    """

    :param Fs: frame rate
    :param x: data
    :param model_name:
    :param model_type:
    :param log:
    :return:
    """
    regression_models = glob.glob(model_name + "_*")
    regression_models2 = []
    for r in regression_models:
        if r[-5::] != "MEANS":
            regression_models2.append(r)
    regression_models = regression_models2
    regression_names = []
    for r in regression_models:
        regression_names.append(r[r.rfind("_")+1::])

    emotion = {"valence": None, "arousal":None}
    # Feature extraction
    x = np.fromstring(x, np.int16)
    if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest':
        [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = aT.load_model(regression_models[0], True)
    else:
        return emotion

    [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step))
    mt_features = mt_features.mean(axis=1)        # long term averaging of mid-term statistics
    if compute_beat:
        [beat, beatConf] = aF.beatExtraction(s, st_step)
        mt_features = np.append(mt_features, beat)
        mt_features = np.append(mt_features, beatConf)

    # Regression
    R = []
    for ir, r in enumerate(regression_models):
        if not os.path.isfile(r):
            print("fileClassification: input model_name not found!")
            return emotion
        if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest':
            [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = aT.load_model(r, True)
        curFV = (mt_features - MEAN) / STD                  # normalization
        R.append(aT.regressionWrapper(model, model_type, curFV))

    if R[0] > 1:
        log.warning("Valence > 1")
        emotion["valence"] = 1
    elif R[0] < -1:
        log.warning("Valence < -1")
        emotion["valence"] = -1
    else:
        emotion["valence"] = R[0]

    if R[1] > 1:
        log.warning("Arousal > 1")
        emotion["arousal"] = 1
    elif R[1] < -1:
        log.warning("Arousal < -1")
        emotion["arousal"] = -1
    else:
        emotion["arousal"] = R[1]

    return emotion
Esempio n. 15
0
def fileClassification(inputFile, modelName, modelType):
    # Load classifier:

    if not os.path.isfile(modelName):
        print("fileClassification: input modelName not found!")
        return (-1, -1, -1)

    if not os.path.isfile(inputFile):
        print("fileClassification: wav file not found!")
        return (-1, -1, -1)

    if (modelType) == 'svm' or (modelType == 'svm_rbf'):
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadSVModel(modelName)
    elif modelType == 'knn':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadKNNModel(modelName)
    elif modelType == 'randomforest':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadRandomForestModel(modelName)
    elif modelType == 'gradientboosting':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadGradientBoostingModel(modelName)
    elif modelType == 'extratrees':
        [
            Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
            computeBEAT
        ] = loadExtraTreesModel(modelName)

    # read audio file and convert to mono
    [Fs, x] = audioBasicIO.readAudioFile(inputFile)
    x = audioBasicIO.stereo2mono(x)

    if isinstance(x, int):  # audio file IO problem
        return (-1, -1, -1)
    if x.shape[0] / float(Fs) <= mtWin:
        return (-1, -1, -1)

    # feature extraction:
    [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs,
                                                  mtWin * Fs, mtStep * Fs,
                                                  round(Fs * stWin),
                                                  round(Fs * stStep))
    # long term averaging of mid-term statistics
    MidTermFeatures = MidTermFeatures.mean(axis=1)
    if computeBEAT:
        [beat, beatConf] = aF.beatExtraction(s, stStep)
        MidTermFeatures = numpy.append(MidTermFeatures, beat)
        MidTermFeatures = numpy.append(MidTermFeatures, beatConf)
    curFV = (MidTermFeatures - MEAN) / STD  # normalization

    [Result, P] = classifierWrapper(Classifier, modelType,
                                    curFV)  # classification
    return Result, P, classNames