def fileChromagramWrapper(wav_file): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stChromagram(x, fs, round(fs * 0.040), round(fs * 0.040), True)
def beatExtractionWrapper(wav_file, plot): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs) bpm, ratio = aF.beatExtraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(bpm))) print("Ratio: {0:.2f} ".format(ratio))
def silenceRemovalWrapper(inputFile, smoothingWindow, weight): if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(inputFile) segmentLimits = aS.silenceRemoval(x, fs, 0.05, 0.05, smoothingWindow, weight, True) for i, s in enumerate(segmentLimits): strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format(inputFile[0:-4], s[0], s[1]) wavfile.write(strOut, fs, x[int(fs * s[0]):int(fs * s[1])])
def fileRegression(inputFile, model_name, model_type): # Load classifier: if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) regression_models = glob.glob(model_name + "_*") regression_models2 = [] for r in regression_models: if r[-5::] != "MEANS": regression_models2.append(r) regression_models = regression_models2 regression_names = [] for r in regression_models: regression_names.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mt_win, etc) if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest': [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(regression_models[0], True) [Fs, x] = audioBasicIO.readAudioFile( inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) # REGRESSION R = [] for ir, r in enumerate(regression_models): if not os.path.isfile(r): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'svm' or model_type == "svm_rbf" \ or model_type == 'randomforest': [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = \ load_model(r, True) curFV = (mt_features - MEAN) / STD # normalization R.append(regressionWrapper(model, model_type, curFV)) # classification return R, regression_names
def dirWavFeatureExtractionNoAveraging(dirName, mt_win, mt_step, st_win, st_step): """ This function extracts the mid-term features of the WAVE files of a particular folder without averaging each file. ARGUMENTS: - dirName: the path of the WAVE directory - mt_win, mt_step: mid-term window and step (in seconds) - st_win, st_step: short-term window and step (in seconds) RETURNS: - X: A feature matrix - Y: A matrix of file labels - filenames: """ all_mt_feats = numpy.array([]) signal_idx = numpy.array([]) process_times = [] types = ('*.wav', '*.aif', '*.aiff', '*.ogg') wav_file_list = [] for files in types: wav_file_list.extend(glob.glob(os.path.join(dirName, files))) wav_file_list = sorted(wav_file_list) for i, wavFile in enumerate(wav_file_list): [fs, x] = audioBasicIO.readAudioFile(wavFile) if isinstance(x, int): continue x = audioBasicIO.stereo2mono(x) [mt_term_feats, _, _] = mtFeatureExtraction(x, fs, round(mt_win * fs), round(mt_step * fs), round(fs * st_win), round(fs * st_step)) mt_term_feats = numpy.transpose(mt_term_feats) if len(all_mt_feats) == 0: # append feature vector all_mt_feats = mt_term_feats signal_idx = numpy.zeros((mt_term_feats.shape[0], )) else: all_mt_feats = numpy.vstack((all_mt_feats, mt_term_feats)) signal_idx = numpy.append( signal_idx, i * numpy.ones((mt_term_feats.shape[0], ))) return (all_mt_feats, signal_idx, wav_file_list)
def fileClassification(inputFile, model_name, model_type): # Load classifier: if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return (-1, -1, -1) if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) if model_type == 'knn': [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model_knn(model_name) else: [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model(model_name) [Fs, x] = audioBasicIO.readAudioFile( inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) if x.shape[0] / float(Fs) <= mt_win: return (-1, -1, -1) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def mtFeatureExtractionToFile(fileName, midTermSize, midTermStep, shortTermSize, shortTermStep, outPutFile, storeStFeatures=False, storeToCSV=False, PLOT=False): """ This function is used as a wrapper to: a) read the content of a WAV file b) perform mid-term feature extraction on that signal c) write the mid-term feature sequences to a numpy file """ [fs, x] = audioBasicIO.readAudioFile(fileName) x = audioBasicIO.stereo2mono(x) if storeStFeatures: [mtF, stF, _] = mtFeatureExtraction(x, fs, round(fs * midTermSize), round(fs * midTermStep), round(fs * shortTermSize), round(fs * shortTermStep)) else: [mtF, _, _] = mtFeatureExtraction(x, fs, round(fs * midTermSize), round(fs * midTermStep), round(fs * shortTermSize), round(fs * shortTermStep)) # save mt features to numpy file numpy.save(outPutFile, mtF) if PLOT: print("Mid-term numpy file: " + outPutFile + ".npy saved") if storeToCSV: numpy.savetxt(outPutFile + ".csv", mtF.T, delimiter=",") if PLOT: print("Mid-term CSV file: " + outPutFile + ".csv saved") if storeStFeatures: # save st features to numpy file numpy.save(outPutFile + "_st", stF) if PLOT: print("Short-term numpy file: " + outPutFile + "_st.npy saved") if storeToCSV: # store st features to CSV file numpy.savetxt(outPutFile + "_st.csv", stF.T, delimiter=",") if PLOT: print("Short-term CSV file: " + outPutFile + "_st.csv saved")
def thumbnailWrapper(inputFile, thumbnailWrapperSize): st_window = 0.5 st_step = 0.5 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(inputFile) if fs == -1: # could not read file return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, fs, st_window, st_step, thumbnailWrapperSize) # write thumbnailWrappers to WAV files: if inputFile.endswith(".wav"): thumbnailWrapperFileName1 = inputFile.replace(".wav", "_thumb1.wav") thumbnailWrapperFileName2 = inputFile.replace(".wav", "_thumb2.wav") if inputFile.endswith(".mp3"): thumbnailWrapperFileName1 = inputFile.replace(".mp3", "_thumb1.mp3") thumbnailWrapperFileName2 = inputFile.replace(".mp3", "_thumb2.mp3") wavfile.write(thumbnailWrapperFileName1, fs, x[int(fs * A1):int(fs * A2)]) wavfile.write(thumbnailWrapperFileName2, fs, x[int(fs * B1):int(fs * B2)]) print("1st thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \ " -- {2:4.1f}sec".format(thumbnailWrapperFileName1, A1, A2)) print("2nd thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \ " -- {2:4.1f}sec".format(thumbnailWrapperFileName2, B1, B2)) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect="auto") plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1 / st_step + A2 / st_step) / 2.0 Ycenter = (B1 / st_step + B2 / st_step) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailWrapperSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1 / st_step, Smatrix.shape[0]], [A1 / st_step, A1 / st_step], color="k", linestyle="--", linewidth=2) plt.plot([B2 / st_step, Smatrix.shape[0]], [A2 / st_step, A2 / st_step], color="k", linestyle="--", linewidth=2) plt.plot([B1 / st_step, B1 / st_step], [A1 / st_step, Smatrix.shape[0]], color="k", linestyle="--", linewidth=2) plt.plot([B2 / st_step, B2 / st_step], [A2 / st_step, Smatrix.shape[0]], color="k", linestyle="--", linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel("frame no") plt.ylabel("frame no") plt.title("Self-similarity matrix") plt.show()
def dirWavFeatureExtraction(dirName, mt_win, mt_step, st_win, st_step, compute_beat=False): """ This function extracts the mid-term features of the WAVE files of a particular folder. The resulting feature vector is extracted by long-term averaging the mid-term features. Therefore ONE FEATURE VECTOR is extracted for each WAV file. ARGUMENTS: - dirName: the path of the WAVE directory - mt_win, mt_step: mid-term window and step (in seconds) - st_win, st_step: short-term window and step (in seconds) """ all_mt_feats = numpy.array([]) process_times = [] types = ('*.wav', '*.aif', '*.aiff', '*.mp3', '*.au', '*.ogg') wav_file_list = [] for files in types: wav_file_list.extend(glob.glob(os.path.join(dirName, files))) wav_file_list = sorted(wav_file_list) wav_file_list2, mt_feature_names = [], [] for i, wavFile in enumerate(wav_file_list): print("Analyzing file {0:d} of " "{1:d}: {2:s}".format(i + 1, len(wav_file_list), wavFile)) if os.stat(wavFile).st_size == 0: print(" (EMPTY FILE -- SKIPPING)") continue [fs, x] = audioBasicIO.readAudioFile(wavFile) if isinstance(x, int): continue t1 = time.clock() x = audioBasicIO.stereo2mono(x) if x.shape[0] < float(fs) / 5: print(" (AUDIO FILE TOO SMALL - SKIPPING)") continue wav_file_list2.append(wavFile) if compute_beat: [mt_term_feats, st_features, mt_feature_names] = \ mtFeatureExtraction(x, fs, round(mt_win * fs), round(mt_step * fs), round(fs * st_win), round(fs * st_step)) [beat, beat_conf] = beatExtraction(st_features, st_step) else: [mt_term_feats, _, mt_feature_names] = \ mtFeatureExtraction(x, fs, round(mt_win * fs), round(mt_step * fs), round(fs * st_win), round(fs * st_step)) mt_term_feats = numpy.transpose(mt_term_feats) mt_term_feats = mt_term_feats.mean(axis=0) # long term averaging of mid-term statistics if (not numpy.isnan(mt_term_feats).any()) and \ (not numpy.isinf(mt_term_feats).any()): if compute_beat: mt_term_feats = numpy.append(mt_term_feats, beat) mt_term_feats = numpy.append(mt_term_feats, beat_conf) if len(all_mt_feats) == 0: # append feature vector all_mt_feats = mt_term_feats else: all_mt_feats = numpy.vstack((all_mt_feats, mt_term_feats)) t2 = time.clock() duration = float(len(x)) / fs process_times.append((t2 - t1) / duration) if len(process_times) > 0: print("Feature extraction complexity ratio: " "{0:.1f} x realtime".format( (1.0 / numpy.mean(numpy.array(process_times))))) return (all_mt_feats, wav_file_list2, mt_feature_names)