def classifyNN(inputFile, modelName): [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadModel(modelName) [Fs, x] = audioBasicIO.readAudioFile(inputFile) x = audioBasicIO.stereo2mono(x) if isinstance(x, int): return (-1, -1, -1) if x.shape[0] / float(Fs) <= mtWin: return (-1, -1, -1) [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) MidTermFeatures = MidTermFeatures.mean( axis=1) # long term averaging of mid-term statistics if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) curFV = (MidTermFeatures - MEAN) / STD # normalization [Result, P] = classify(Classifier, curFV) return Result, P, classNames
def feature_combined(filename, smooth_window_size=0): rate, wav_data = wavfile.read(filename) external_vect = external_feature_vects(wav_data, rate) BPM, _ = audioFeatureExtraction.beatExtraction(external_vect, 0.050) method_1 = np.mean(external_vect, axis=0) FFT_data = get_FFT_data(wav_data, rate) if smooth_window_size != 0: FFT_data = run_avg(FFT_data, smooth_window_size) BPM = get_beat(wav_data, rate) x_vals = FFT_indices_to_hz(FFT_data, rate) note_intensities = [ max_note_intensity(x_vals, FFT_data, i) for i in range(0, 97) ] method_3 = np.append(note_intensities, BPM) avg = total_avg_note_intensity(x_vals, FFT_data) max = [ np.sum([note_intensities[i + 12 * j] for j in range(0, 8)]) for i in range(0, 12) ] method_2 = np.append(avg, np.append(max, BPM)) #vals = top_n_vals(FFT_data, x_vals, 30) #method_4 = np.append(vals, BPM) return [method_1, method_2, method_3]
def segmentClassification(data, model_name, model_type): if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'knn': [classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat] = load_model_knn(model_name) else: [classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(model_name) [Fs, x] = 250000, audioBasicIO.stereo2mono(data) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean(axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def beatExtractionWrapper(wav_file, plot): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs) bpm, ratio = aF.beatExtraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(bpm))) print("Ratio: {0:.2f} ".format(ratio))
def beatExtractionWrapper(wavFileName, plot): if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs) BPM, ratio = aF.beatExtraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(BPM))) print("Ratio: {0:.2f} ".format(ratio))
def fileRegression(inputFile, modelName, modelType): # Load classifier: if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) regressionModels = glob.glob(modelName + "_*") regressionModels2 = [] for r in regressionModels: if r[-5::] != "MEANS": regressionModels2.append(r) regressionModels = regressionModels2 regressionNames = [] for r in regressionModels: regressionNames.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mtWin, etc) if modelType == 'svm' or modelType == "svm_rbf": [_, _, _, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(regressionModels[0], True) elif modelType == 'randomforest': [_, _, _, mtWin, mtStep, stWin, stStep, computeBEAT] = loadRandomForestModel(regressionModels[0], True) # read audio file and convert to mono [Fs, x] = audioBasicIO.readAudioFile(inputFile) x = audioBasicIO.stereo2mono(x) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) # long term averaging of mid-term statistics MidTermFeatures = MidTermFeatures.mean(axis=1) if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) # REGRESSION R = [] for ir, r in enumerate(regressionModels): if not os.path.isfile(r): print("fileClassification: input modelName not found!") return (-1, -1, -1) if modelType == 'svm' or modelType == "svm_rbf": [Model, MEAN, STD, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(r, True) elif modelType == 'randomforest': [Model, MEAN, STD, mtWin, mtStep, stWin, stStep, computeBEAT] = loadRandomForestModel(r, True) curFV = (MidTermFeatures - MEAN) / STD # normalization # classification R.append(regressionWrapper(Model, modelType, curFV)) return R, regressionNames
def fileClassification(inputFile, model_name, model_type): # Load classifier: print("Loading Classifier") if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return (-1, -1, -1) if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) if model_type == 'knn': [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model_knn(model_name) else: [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model(model_name) print("Printing Classnames") print(classNames) [Fs, x] = audioBasicIO.readAudioFile( inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) print('io problem') if x.shape[0] / float(Fs) <= mt_win: return (-1, -1, -1) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def bufferRegression(audioBuffer, sampleRate, model_name, model_type): # Load classifier: regression_models = glob.glob(model_name + "_*") regression_models2 = [] for r in regression_models: if r[-5::] != "MEANS": regression_models2.append(r) regression_models = regression_models2 regression_names = [] for r in regression_models: regression_names.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mt_win, etc) if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest': [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(regression_models[0], True) Fs = sampleRate x = audioBuffer # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) # REGRESSION R = [] for ir, r in enumerate(regression_models): if not os.path.isfile(r): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'svm' or model_type == "svm_rbf" \ or model_type == 'randomforest': [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = \ load_model(r, True) curFV = (mt_features - MEAN) / STD # normalization R.append(regressionWrapper(model, model_type, curFV)) # classification return R, regression_names
def bufferClassification(audioBuffer, sampleRate, model_name, model_type): # Load classifier: if model_type == 'knn': [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model_knn(model_name) else: [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model(model_name) if isinstance(audioBuffer, int): # audio buffer format problem print("bufferClassification: bad audio format!") return (-1, -1, -1) if audioBuffer.shape[0] / float(sampleRate) <= mt_win: print( "bufferClassification: too little audio to analyze with medium term window", mt_win) return (-1, -1, -1) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(audioBuffer, sampleRate, mt_win * sampleRate, mt_step * sampleRate, round(sampleRate * st_win), round(sampleRate * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def getBPM(): while (1): print "LISTENING~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~SHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH" soundRecorder.soundRecord5Sec() [Fs, x] = audioBasicIO.readAudioFile("file.wav") x = x[:, 0] # sometimes necessary due to different wav files... winSize = .1 # size of the window to extract data from winStep = winSize / 2 # 50% overlap steps F = audioFeatureExtraction.stFeatureExtraction(x, Fs, winSize * Fs, winStep * Fs) soundRecorder.soundRecord5Sec() #plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel('ZCR'); #plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel('Energy'); plt.show() [BPM, RATIO] = audioFeatureExtraction.beatExtraction(F, winSize, PLOT=False) #BPM is lower by a factor of 2 from the actual bpm. So we will multiply by 2. BPM = BPM * 2 global globalNewBPM globalNewBPM = BPM print "I heard this BPM, master!",
def dirWavFeatureExtraction(dirName, mt_win, mt_step, st_win, st_step, feats, compute_beat=False): """ This function extracts the mid-term features of the WAVE files of a particular folder. The resulting feature vector is extracted by long-term averaging the mid-term features. Therefore ONE FEATURE VECTOR is extracted for each WAV file. ARGUMENTS: - dirName: the path of the WAVE directory - mt_win, mt_step: mid-term window and step (in seconds) - st_win, st_step: short-term window and step (in seconds) """ all_mt_feats = numpy.array([]) process_times = [] types = ('*.wav', '*.aif', '*.aiff', '*.mp3', '*.au', '*.ogg') wav_file_list = [] for files in types: wav_file_list.extend(glob.glob(os.path.join(dirName, files))) wav_file_list = sorted(wav_file_list) wav_file_list2, mt_feature_names = [], [] for i, wavFile in enumerate(wav_file_list): print("Analyzing file {0:d} of " "{1:d}: {2:s}".format(i + 1, len(wav_file_list), wavFile)) if os.stat(wavFile).st_size == 0: print(" (EMPTY FILE -- SKIPPING)") continue [fs, x] = audioBasicIO.readAudioFile(wavFile) if isinstance(x, int): continue t1 = time.clock() x = audioBasicIO.stereo2mono(x) if x.shape[0] < float(fs) / 5: print(" (AUDIO FILE TOO SMALL - SKIPPING)") continue wav_file_list2.append(wavFile) if compute_beat: [mt_term_feats, st_features, mt_feature_names] = \ mtFeatureExtraction(x, fs, round(mt_win * fs), round(mt_step * fs), round(fs * st_win), round(fs * st_step), feats) [beat, beat_conf] = beatExtraction(st_features, st_step) else: [mt_term_feats, _, mt_feature_names] = \ mtFeatureExtraction(x, fs, round(mt_win * fs), round(mt_step * fs), round(fs * st_win), round(fs * st_step), feats) mt_term_feats = numpy.transpose(mt_term_feats) mt_term_feats = mt_term_feats.mean(axis=0) # long term averaging of mid-term statistics if (not numpy.isnan(mt_term_feats).any()) and \ (not numpy.isinf(mt_term_feats).any()): if compute_beat: mt_term_feats = numpy.append(mt_term_feats, beat) mt_term_feats = numpy.append(mt_term_feats, beat_conf) if len(all_mt_feats) == 0: # append feature vector all_mt_feats = mt_term_feats else: all_mt_feats = numpy.vstack((all_mt_feats, mt_term_feats)) t2 = time.clock() duration = float(len(x)) / fs process_times.append((t2 - t1) / duration) if len(process_times) > 0: print("Feature extraction complexity ratio: " "{0:.1f} x realtime".format( (1.0 / numpy.mean(numpy.array(process_times))))) return (all_mt_feats, wav_file_list2, mt_feature_names)
def get_beat(wav_data, rate): features = audioFeatureExtraction.stFeatureExtraction( wav_data, rate, 0.050 * rate, 0.050 * rate) BPM, r = audioFeatureExtraction.beatExtraction(features, 0.050) return BPM
def emotion_from_speech(Fs, x, log, model_name="pyAudioAnalysis/pyAudioAnalysis/data/svmSpeechEmotion", model_type="svm"): """ :param Fs: frame rate :param x: data :param model_name: :param model_type: :param log: :return: """ regression_models = glob.glob(model_name + "_*") regression_models2 = [] for r in regression_models: if r[-5::] != "MEANS": regression_models2.append(r) regression_models = regression_models2 regression_names = [] for r in regression_models: regression_names.append(r[r.rfind("_")+1::]) emotion = {"valence": None, "arousal":None} # Feature extraction x = np.fromstring(x, np.int16) if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest': [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = aT.load_model(regression_models[0], True) else: return emotion [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean(axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = np.append(mt_features, beat) mt_features = np.append(mt_features, beatConf) # Regression R = [] for ir, r in enumerate(regression_models): if not os.path.isfile(r): print("fileClassification: input model_name not found!") return emotion if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest': [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = aT.load_model(r, True) curFV = (mt_features - MEAN) / STD # normalization R.append(aT.regressionWrapper(model, model_type, curFV)) if R[0] > 1: log.warning("Valence > 1") emotion["valence"] = 1 elif R[0] < -1: log.warning("Valence < -1") emotion["valence"] = -1 else: emotion["valence"] = R[0] if R[1] > 1: log.warning("Arousal > 1") emotion["arousal"] = 1 elif R[1] < -1: log.warning("Arousal < -1") emotion["arousal"] = -1 else: emotion["arousal"] = R[1] return emotion
def fileClassification(inputFile, modelName, modelType): # Load classifier: if not os.path.isfile(modelName): print("fileClassification: input modelName not found!") return (-1, -1, -1) if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) if (modelType) == 'svm' or (modelType == 'svm_rbf'): [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadKNNModel(modelName) elif modelType == 'randomforest': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadRandomForestModel(modelName) elif modelType == 'gradientboosting': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadGradientBoostingModel(modelName) elif modelType == 'extratrees': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadExtraTreesModel(modelName) # read audio file and convert to mono [Fs, x] = audioBasicIO.readAudioFile(inputFile) x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) if x.shape[0] / float(Fs) <= mtWin: return (-1, -1, -1) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) # long term averaging of mid-term statistics MidTermFeatures = MidTermFeatures.mean(axis=1) if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) curFV = (MidTermFeatures - MEAN) / STD # normalization [Result, P] = classifierWrapper(Classifier, modelType, curFV) # classification return Result, P, classNames