def extractFeatures(self, eventsList, Fs, snr): feature = [] for event in eventsList: frame = event.getData() F = audioFeatureExtraction.stFeatureExtraction( frame, Fs, self.frameSize * Fs, self.frameStep * Fs) raw_feature = F[:self.discard, :].T tmp = [] for j in range(0, raw_feature.shape[1] ): # compute median and med for each columns feature_column = raw_feature[:, j] median = np.median(raw_feature[:, j]) median_absolute_deviation = np.median( np.abs(feature_column - median)) tmp.append(median) tmp.append(median_absolute_deviation) tmp.append(event.getTarget()) # add class label tmp.append(raw_feature.shape[0]) # add number of frame per signal tmp.append(snr) # add snr tmp.append(event.getId()) # add id tmp.append(event.getBackground()) # add background type feature.append(tmp) return feature
def beatExtractionWrapper(wavFileName, plot): if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs) BPM, ratio = aF.beatExtraction(F, 0.050, plot) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio)
def beatExtractionWrapper(wav_file, plot): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs) bpm, ratio = aF.beatExtraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(bpm))) print("Ratio: {0:.2f} ".format(ratio))
def musicThumbnailing(x, Fs, shortTermSize=1.0, shortTermStep=0.5, thumbnailSize=10.0): ''' This function detects instances of the most representative part of a music recording, also called "music thumbnails". A technique similar to the one proposed in [1], however a wider set of audio features is used instead of chroma features. In particular the following steps are followed: - Extract short-term audio features. Typical short-term window size: 1 second - Compute the self-silimarity matrix, i.e. all pairwise similarities between feature vectors - Apply a diagonal mask is as a moving average filter on the values of the self-similarty matrix. The size of the mask is equal to the desirable thumbnail length. - Find the position of the maximum value of the new (filtered) self-similarity matrix. The audio segments that correspond to the diagonial around that position are the selected thumbnails ARGUMENTS: - x: input signal - Fs: sampling frequency - shortTermSize: window size (in seconds) - shortTermStep: window step (in seconds) - thumbnailSize: desider thumbnail size (in seconds) RETURNS: - A1: beginning of 1st thumbnail (in seconds) - A2: ending of 1st thumbnail (in seconds) - B1: beginning of 2nd thumbnail (in seconds) - B2: ending of 2nd thumbnail (in seconds) USAGE EXAMPLE: import audioFeatureExtraction as aF [Fs, x] = basicIO.readAudioFile(inputFile) [A1, A2, B1, B2] = musicThumbnailing(x, Fs) [1] Bartsch, M. A., & Wakefield, G. H. (2005). Audio thumbnailing of popular music using chroma-based representations. Multimedia, IEEE Transactions on, 7(1), 96-104. ''' x = audioBasicIO.stereo2mono(x); # feature extraction: stFeatures = aF.stFeatureExtraction(x, Fs, Fs*shortTermSize, Fs*shortTermStep) # self-similarity matrix S = selfSimilarityMatrix(stFeatures) # moving filter: M = int(round(thumbnailSize / shortTermStep)) B = numpy.eye(M,M) S = scipy.signal.convolve2d(S, B, 'valid') # post-processing (remove main diagonal elements) MIN = numpy.min(S) for i in range(S.shape[0]): for j in range(S.shape[1]): if abs(i-j) < 5.0 / shortTermStep or i > j: S[i,j] = MIN; # find max position: maxVal = numpy.max(S) I = numpy.argmax(S) [I, J] = numpy.unravel_index(S.argmax(), S.shape) # expand: i1 = I; i2 = I j1 = J; j2 = J while i2-i1<M: if S[i1-1, j1-1] > S[i2+1,j2+1]: i1 -= 1 j1 -= 1 else: i2 += 1 j2 += 1 return (shortTermStep*i1, shortTermStep*i2, shortTermStep*j1, shortTermStep*j2, S)
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow = 0.5, Weight = 0.5, plot = False): ''' Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - Fs: sampling freq - stWin, stStep: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - Weight: (optinal) weight factor (0 < Weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - segmentLimits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds ''' if Weight>=1: Weight = 0.99; if Weight<=0: Weight = 0.01; # Step 1: feature extraction x = audioBasicIO.stereo2mono(x); # convert to mono ShortTermFeatures = aF.stFeatureExtraction(x, Fs, stWin*Fs, stStep*Fs) # extract short-term features # Step 2: train binary SVM classifier of low vs high energy frames EnergySt = ShortTermFeatures[1, :] # keep only the energy short-term sequence (2nd feature) E = numpy.sort(EnergySt) # sort the energy feature values: L1 = int(len(E)/10) # number of 10% of the total short-term windows T1 = numpy.mean(E[0:L1]) # compute "lower" 10% energy threshold T2 = numpy.mean(E[-L1:-1]) # compute "higher" 10% energy threshold Class1 = ShortTermFeatures[:,numpy.where(EnergySt<T1)[0]] # get all features that correspond to low energy Class2 = ShortTermFeatures[:,numpy.where(EnergySt>T2)[0]] # get all features that correspond to high energy featuresSS = [Class1.T, Class2.T]; # form the binary classification task and ... [featuresNormSS, MEANSS, STDSS] = aT.normalizeFeatures(featuresSS) # normalize and ... SVM = aT.trainSVM(featuresNormSS, 1.0) # train the respective SVM probabilistic model (ONSET vs SILENCE) # Step 3: compute onset probability based on the trained SVM ProbOnset = [] for i in range(ShortTermFeatures.shape[1]): # for each frame curFV = (ShortTermFeatures[:,i] - MEANSS) / STDSS # normalize feature vector ProbOnset.append(SVM.pred_probability(curFV)[1]) # get SVM probability (that it belongs to the ONSET class) ProbOnset = numpy.array(ProbOnset) ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep) # smooth probability # Step 4A: detect onset frame indices: ProbOnsetSorted = numpy.sort(ProbOnset) # find probability Threshold as a weighted average of top 10% and lower 10% of the values Nt = ProbOnsetSorted.shape[0] / 10; T = (numpy.mean( (1-Weight)*ProbOnsetSorted[0:Nt] ) + Weight*numpy.mean(ProbOnsetSorted[-Nt::]) ) MaxIdx = numpy.where(ProbOnset>T)[0]; # get the indices of the frames that satisfy the thresholding i = 0; timeClusters = [] segmentLimits = [] # Step 4B: group frame indices to onset segments while i<len(MaxIdx): # for each of the detected onset indices curCluster = [MaxIdx[i]] if i==len(MaxIdx)-1: break while MaxIdx[i+1] - curCluster[-1] <= 2: curCluster.append(MaxIdx[i+1]) i += 1 if i==len(MaxIdx)-1: break i += 1 timeClusters.append(curCluster) segmentLimits.append([curCluster[0]*stStep, curCluster[-1]*stStep]) # Step 5: Post process: remove very small segments: minDuration = 0.2; segmentLimits2 = [] for s in segmentLimits: if s[1] - s[0] > minDuration: segmentLimits2.append(s) segmentLimits = segmentLimits2; if plot: timeX = numpy.arange(0, x.shape[0] / float(Fs) , 1.0/Fs) plt.subplot(2,1,1); plt.plot(timeX, x) for s in segmentLimits: plt.axvline(x=s[0]); plt.axvline(x=s[1]); plt.subplot(2,1,2); plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset); plt.title('Signal') for s in segmentLimits: plt.axvline(x=s[0]); plt.axvline(x=s[1]); plt.title('SVM Probability') plt.show() return segmentLimits
import audioBasicIO import audioFeatureExtraction import matplotlib.pyplot as plt import numpy print("COUNT 1\n") [Fs1, x1] = audioBasicIO.readAudioFile("data/practice.wav") F1 = audioFeatureExtraction.stFeatureExtraction(x1, Fs1, 0.050 * Fs1, 0.025 * Fs1) # F1[12*420] MATRIX print(len(F1[9:21]), len(F1[9:21][0])) print("\n\nCOUNT 2\n") [Fs2, x2] = audioBasicIO.readAudioFile("data/practice2.wav") F2 = audioFeatureExtraction.stFeatureExtraction(x2, Fs2, 0.050 * Fs2, 0.025 * Fs2) print(len(F2[9:21]), len(F2[9:21][0])) size = min(len(F2[9:21][0]), len(F1[9:21][0])) print(size, "\n") print("\n\nCORRCOEF\n") print(numpy.corrcoef(F1[9:21, 0:size], F2[9:21, 0:size]) * 0.5 + 0.5) print("\n\nE Distance\n") print(numpy.linalg.norm(F1[9:21, 0:size] - F2[9:21, 0:size]))
def silenceRemoval(x, fs, st_win, st_step, smoothWindow=0.5, weight=0.5, plot=False): ''' Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - fs: sampling freq - st_win, st_step: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - weight: (optinal) weight factor (0 < weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - seg_limits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds ''' if weight >= 1: weight = 0.99 if weight <= 0: weight = 0.01 # Step 1: feature extraction x = audioBasicIO.stereo2mono(x) st_feats, _ = aF.stFeatureExtraction(x, fs, st_win * fs, st_step * fs) # Step 2: train binary svm classifier of low vs high energy frames # keep only the energy short-term sequence (2nd feature) st_energy = st_feats[1, :] en = numpy.sort(st_energy) # number of 10% of the total short-term windows l1 = int(len(en) / 10) # compute "lower" 10% energy threshold t1 = numpy.mean(en[0:l1]) + 0.000000000000001 # compute "higher" 10% energy threshold t2 = numpy.mean(en[-l1:-1]) + 0.000000000000001 # get all features that correspond to low energy class1 = st_feats[:, numpy.where(st_energy <= t1)[0]] # get all features that correspond to high energy class2 = st_feats[:, numpy.where(st_energy >= t2)[0]] # form the binary classification task and ... faets_s = [class1.T, class2.T] # normalize and train the respective svm probabilistic model # (ONSET vs SILENCE) [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s) svm = aT.trainSVM(faets_s_norm, 1.0) # Step 3: compute onset probability based on the trained svm prob_on_set = [] for i in range(st_feats.shape[1]): # for each frame cur_fv = (st_feats[:, i] - means_s) / stds_s # get svm probability (that it belongs to the ONSET class) prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1]) prob_on_set = numpy.array(prob_on_set) # smooth probability: prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step) # Step 4A: detect onset frame indices: prog_on_set_sort = numpy.sort(prob_on_set) # find probability Threshold as a weighted average # of top 10% and lower 10% of the values Nt = int(prog_on_set_sort.shape[0] / 10) T = (numpy.mean((1 - weight) * prog_on_set_sort[0:Nt]) + weight * numpy.mean(prog_on_set_sort[-Nt::])) max_idx = numpy.where(prob_on_set > T)[0] # get the indices of the frames that satisfy the thresholding i = 0 time_clusters = [] seg_limits = [] # Step 4B: group frame indices to onset segments while i < len(max_idx): # for each of the detected onset indices cur_cluster = [max_idx[i]] if i == len(max_idx) - 1: break while max_idx[i + 1] - cur_cluster[-1] <= 2: cur_cluster.append(max_idx[i + 1]) i += 1 if i == len(max_idx) - 1: break i += 1 time_clusters.append(cur_cluster) seg_limits.append( [cur_cluster[0] * st_step, cur_cluster[-1] * st_step]) # Step 5: Post process: remove very small segments: min_dur = 0.2 seg_limits_2 = [] for s in seg_limits: if s[1] - s[0] > min_dur: seg_limits_2.append(s) seg_limits = seg_limits_2 if plot: timeX = numpy.arange(0, x.shape[0] / float(fs), 1.0 / fs) plt.subplot(2, 1, 1) plt.plot(timeX, x) for s in seg_limits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.subplot(2, 1, 2) plt.plot(numpy.arange(0, prob_on_set.shape[0] * st_step, st_step), prob_on_set) plt.title('Signal') for s in seg_limits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.title('svm Probability') plt.show() return seg_limits
def main(argv): if argv[1] == "-dirMp3toWAV": # convert mp3 to wav (batch) if len(argv) == 5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)." return if argv[4] not in ["1", "2"]: print "Error. Number of output channels must be 1 or 2" return if not os.path.isdir(path): raise Exception("Input path not found!") useMp3TagsAsNames = True audioBasicIO.convertDirMP3ToWav(path, int(argv[3]), int(argv[4]), useMp3TagsAsNames) else: print "Error.\nSyntax: " + argv[ 0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" if argv[1] == "-dirWAVChangeFs": # convert mp3 to wav (batch) if len(argv) == 5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)." return if argv[4] not in ["1", "2"]: print "Error. Number of output channels must be 1 or 2" return if not os.path.isdir(path): raise Exception("Input path not found!") audioBasicIO.convertFsDirWavToWav(path, int(argv[3]), int(argv[4])) else: print "Error.\nSyntax: " + argv[ 0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" elif argv[ 1] == "-featureExtractionFile": # short-term and mid-term feature extraction to files (csv and numpy) if len(argv) == 7: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception( "Mid-term and short-term window sizes and steps must be numbers!" ) mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) outFile = wavFileName aF.mtFeatureExtractionToFile(wavFileName, mtWin, mtStep, stWin, stStep, outFile, True, True, True) else: print "Error.\nSyntax: " + argv[ 0] + " -featureExtractionFile <wavFileName> <mtWin> <mtStep> <stWin> <stStep>" elif argv[1] == "-beatExtraction": if len(argv) == 4: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3])): raise Exception("PLOT must be either 0 or 1") if not ((int(argv[3]) == 0) or (int(argv[3]) == 1)): raise Exception("PLOT must be either 0 or 1") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs) BPM, ratio = aF.beatExtraction(F, 0.050, int(argv[3]) == 1) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio) else: print "Error.\nSyntax: " + argv[ 0] + " -beatExtraction <wavFileName> <PLOT (0 or 1)>" elif argv[ 1] == '-featureExtractionDir': # same as -featureExtractionFile, in a batch mode (i.e. for each WAV file in the provided path) if len(argv) == 7: path = argv[2] if not os.path.isdir(path): raise Exception("Input path not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception( "Mid-term and short-term window sizes and steps must be numbers!" ) mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) aF.mtFeatureExtractionToFileDir(path, mtWin, mtStep, stWin, stStep, True, True, True) else: print "Error.\nSyntax: " + argv[ 0] + " -featureExtractionDir <path> <mtWin> <mtStep> <stWin> <stStep>" elif argv[ 1] == '-featureVisualizationDir': # visualize the content relationships between recordings stored in a folder if len(argv) == 3: if not os.path.isdir(argv[2]): raise Exception("Input folder not found!") aV.visualizeFeaturesFolder(argv[2], "pca", "") elif argv[ 1] == '-fileSpectrogram': # show spectogram of a sound stored in a file if len(argv) == 3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stSpectogram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[ 1] == '-fileChromagram': # show spectogram of a sound stored in a file if len(argv) == 3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stChromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[1] == "-trainClassifier": # Segment classifier training (OK) if len(argv) > 6: method = argv[2] beatFeatures = (int(argv[3]) == 1) listOfDirs = argv[4:len(argv) - 1] modelName = argv[-1] aT.featureAndTrain(listOfDirs, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT=beatFeatures) else: print "Error.\nSyntax: " + argv[ 0] + " -trainClassifier <method(svm or knn)> <beat features> <directory 1> <directory 2> ... <directory N> <modelName>" elif argv[1] == "-trainRegression": # Segment regression model if len(argv) == 6: method = argv[2] beatFeatures = (int(argv[3]) == 1) dirName = argv[4] modelName = argv[5] aT.featureAndTrainRegression(dirName, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT=beatFeatures) else: print "Error.\nSyntax: " + argv[ 0] + " -trainRegression <method(svm or knn)> <beat features> <directory> <modelName>" elif argv[1] == "-classifyFile": # Single File Classification (OK) if len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Result, P, classNames] = aT.fileClassification(inputFile, modelName, modelType) print "{0:s}\t{1:s}".format("Class", "Probability") for i, c in enumerate(classNames): print "{0:s}\t{1:.2f}".format(c, P[i]) print "Winner class: " + classNames[int(Result)] else: print "Error.\nSyntax: " + argv[ 0] + " -classifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-regressionFile": # Single File Classification (OK) if len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") R, regressionNames = aT.fileRegression(inputFile, modelName, modelType) for i in range(len(R)): print "{0:s}\t{1:.3f}".format(regressionNames[i], R[i]) #print "{0:s}\t{1:.2f}".format(c,P[i]) else: print "Error.\nSyntax: " + argv[ 0] + " -regressionFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-classifyFolder": # Directory classification (Ok) if len(argv) == 6 or len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if len(argv) == 6: outputMode = argv[5] else: outputMode = "0" if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if outputMode not in ["0", "1"]: raise Exception("outputMode has to be 0 or 1") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) Result = int(Result) Results.append(Result) if outputMode == "1": print "{0:s}\t{1:s}".format(wavFile, classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames) + 1)) for i, h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[ 0] + " -classifyFolder <method(svm or knn)> <modelName> <folderName> <outputMode(0 or 1)" elif argv[ 1] == "-regressionFolder": # Regression applied on the WAV files of a folder if len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: R, regressionNames = aT.fileRegression(wavFile, modelName, modelType) Results.append(R) Results = numpy.array(Results) for i, r in enumerate(regressionNames): [Histogram, bins] = numpy.histogram(Results[:, i]) centers = (bins[0:-1] + bins[1::]) / 2.0 plt.subplot(len(regressionNames), 1, i) plt.plot(centers, Histogram) plt.title(r) plt.show() # for h in Histogram: # print "{0:20d}".format(h), # if outputMode=="1": # for i,h in enumerate(Histogram): # print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[ 0] + " -regressionFolder <method(svm or knn)> <modelName> <folderName>" elif argv[1] == '-trainHMMsegmenter_fromfile': if len(argv) == 7: wavFile = argv[2] gtFile = argv[3] hmmModelName = argv[4] if not uT.isNum(argv[5]): print "Error: mid-term window size must be float!" return if not uT.isNum(argv[6]): print "Error: mid-term window step must be float!" return mtWin = float(argv[5]) mtStep = float(argv[6]) if not os.path.isfile(wavFile): print "Error: wavfile does not exist!" return if not os.path.isfile(gtFile): print "Error: groundtruth does not exist!" return aS.trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[ 0] + " -trainHMMsegmenter_fromfile <wavFilePath> <gtSegmentFilePath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[1] == '-trainHMMsegmenter_fromdir': if len(argv) == 6: dirPath = argv[2] hmmModelName = argv[3] if not uT.isNum(argv[4]): print "Error: mid-term window size must be float!" if not uT.isNum(argv[5]): print "Error: mid-term window step must be float!" mtWin = float(argv[4]) mtStep = float(argv[5]) aS.trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[ 0] + " -trainHMMsegmenter_fromdir <dirPath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[ 1] == "-segmentClassifyFileHMM": # HMM-based segmentation-classification if len(argv) == 4: hmmModelName = argv[2] wavFile = argv[3] gtFile = wavFile.replace('.wav', '.segments') aS.hmmSegmentation(wavFile, hmmModelName, PLOT=True, gtFileName=gtFile) else: print "Error.\nSyntax: " + argv[ 0] + " -segmentClassifyHMM <hmmModelName> <fileName>" elif argv[ 1] == '-segmentClassifyFile': # Segmentation-classification (fix-sized segment using knn or svm) if (len(argv) == 5): modelType = argv[2] modelName = argv[3] inputWavFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputWavFile): raise Exception("Input audio file not found!") gtFile = inputWavFile.replace('.wav', '.segments') aS.mtFileClassification(inputWavFile, modelName, modelType, True, gtFile) else: print "Error.\nSyntax: " + argv[ 0] + " -segmentClassifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-segmentationEvaluation": if len(argv) == 5: methodName = argv[2] modelName = argv[3] dirName = argv[4] aS.evaluateSegmentationClassificationDir(dirName, modelName, methodName) else: print "Error.\nSyntax: " + argv[ 0] + " -segmentationEvaluation <method(svm or knn)> <modelName> <directoryName>" elif argv[1] == "-silenceRemoval": if len(argv) == 5: inputFile = argv[2] if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") smoothingWindow = float(argv[3]) weight = float(argv[4]) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio signal segmentLimits = aS.silenceRemoval(x, Fs, 0.05, 0.05, smoothingWindow, weight, False) # get onsets for i, s in enumerate(segmentLimits): strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format( inputFile[0:-4], s[0], s[1]) wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])]) else: print "Error.\nSyntax: " + argv[ 0] + " -silenceRemoval <inputFile> <smoothinWindow(secs)> <Threshold Weight>" elif argv[ 1] == '-speakerDiarization': # speaker diarization (from file): TODO inputFile = argv[2] nSpeakers = int(argv[3]) useLDA = (int(argv[4]) == 1) if useLDA: aS.speakerDiarization(inputFile, nSpeakers, PLOT=True) else: aS.speakerDiarization(inputFile, nSpeakers, LDAdim=0, PLOT=True) #print speechLimits elif argv[1] == "-speakerDiarizationScriptEval": dir = argv[2] listOfLDAs = [int(l) for l in argv[3::]] aS.speakerDiarizationEvaluateScript(dir, listOfLDAs) elif argv[1] == '-thumbnail': # music thumbnailing (OK) if len(argv) == 4: inputFile = argv[2] stWindow = 1.0 stStep = 1.0 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read file if Fs == -1: # could not read file return try: thumbnailSize = float(argv[3]) except ValueError: print "Thumbnail size must be a float (in seconds)" return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing( x, Fs, stWindow, stStep, thumbnailSize) # find thumbnail endpoints # write thumbnails to WAV files: thumbnailFileName1 = inputFile.replace(".wav", "_thumb1.wav") thumbnailFileName2 = inputFile.replace(".wav", "_thumb2.wav") wavfile.write(thumbnailFileName1, Fs, x[int(Fs * A1):int(Fs * A2)]) wavfile.write(thumbnailFileName2, Fs, x[int(Fs * B1):int(Fs * B2)]) print "1st thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format( thumbnailFileName1, A1, A2) print "2nd thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format( thumbnailFileName2, B1, B2) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect='auto') plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1 / stStep + A2 / stStep) / 2.0 Ycenter = (B1 / stStep + B2 / stStep) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1, Smatrix.shape[0]], [A1, A1], color='k', linestyle='--', linewidth=2) plt.plot([B2, Smatrix.shape[0]], [A2, A2], color='k', linestyle='--', linewidth=2) plt.plot([B1, B1], [A1, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.plot([B2, B2], [A2, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel('frame no') plt.ylabel('frame no') plt.title('Self-similarity matrix') plt.show() else: print "Error.\nSyntax: " + argv[ 0] + " -thumbnail <filename> <thumbnailsize(seconds)>"
import audioBasicIO import audioFeatureExtraction import matplotlib.pyplot as plt import audioTrainTest as aT plot = False [Fs, x] = audioBasicIO.readAudioFile("data/Heavy.wav") F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.025 * Fs) # ZCR: The rate of sign-changes of the signal during the duration of a particular frame. if plot: plt.subplot(2, 2, 1) plt.plot(F[0, :]) plt.xlabel('Frame no') plt.ylabel('ZCR') plt.subplot(2, 2, 2) plt.plot(F[1, :]) plt.xlabel('Frame no') plt.ylabel('Energy') plt.subplot(2, 2, 3) plt.plot(F[2, :]) plt.xlabel('Frame no') plt.ylabel('Entropy of Energy') plt.subplot(2, 2, 4) plt.plot(F[3, :]) plt.xlabel('Frame no') plt.ylabel('Spectral Centroid') plt.show() Result, P, classNames = aT.fileClassification("data/Heavy.wav", "data/svmMusicGenre3", "svm") print Result
def main(argv): if argv[1] == "-dirMp3toWAV": # convert mp3 to wav (batch) if len(argv)==5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."; return if argv[4] not in ["1","2"]: print "Error. Number of output channels must be 1 or 2"; return if not os.path.isdir(path): raise Exception("Input path not found!") useMp3TagsAsNames = True audioBasicIO.convertDirMP3ToWav(path, int(argv[3]), int(argv[4]), useMp3TagsAsNames) else: print "Error.\nSyntax: " + argv[0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" if argv[1] == "-dirWAVChangeFs": # convert mp3 to wav (batch) if len(argv)==5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."; return if argv[4] not in ["1","2"]: print "Error. Number of output channels must be 1 or 2"; return if not os.path.isdir(path): raise Exception("Input path not found!") audioBasicIO.convertFsDirWavToWav(path, int(argv[3]), int(argv[4])) else: print "Error.\nSyntax: " + argv[0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" elif argv[1] == "-featureExtractionFile": # short-term and mid-term feature extraction to files (csv and numpy) if len(argv)==7: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception("Mid-term and short-term window sizes and steps must be numbers!") mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) outFile = wavFileName aF.mtFeatureExtractionToFile(wavFileName, mtWin, mtStep, stWin, stStep, outFile, True, True, True) else: print "Error.\nSyntax: " + argv[0] + " -featureExtractionFile <wavFileName> <mtWin> <mtStep> <stWin> <stStep>" elif argv[1] == "-beatExtraction": if len(argv)==4: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3])): raise Exception("PLOT must be either 0 or 1") if not ( (int(argv[3]) == 0) or (int(argv[3]) == 1) ): raise Exception("PLOT must be either 0 or 1") [Fs, x] = audioBasicIO.readAudioFile(wavFileName); F = aF.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs); BPM, ratio = aF.beatExtraction(F, 0.050, int(argv[3])==1) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio) else: print "Error.\nSyntax: " + argv[0] + " -beatExtraction <wavFileName> <PLOT (0 or 1)>" elif argv[1] == '-featureExtractionDir': # same as -featureExtractionFile, in a batch mode (i.e. for each WAV file in the provided path) if len(argv)==7: path = argv[2] if not os.path.isdir(path): raise Exception("Input path not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception("Mid-term and short-term window sizes and steps must be numbers!") mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) aF.mtFeatureExtractionToFileDir(path, mtWin, mtStep, stWin, stStep, True, True, True) else: print "Error.\nSyntax: " + argv[0] + " -featureExtractionDir <path> <mtWin> <mtStep> <stWin> <stStep>" elif argv[1] == '-featureVisualizationDir': # visualize the content relationships between recordings stored in a folder if len(argv)==3: if not os.path.isdir(argv[2]): raise Exception("Input folder not found!") aV.visualizeFeaturesFolder(argv[2], "pca", "") elif argv[1] == '-fileSpectrogram': # show spectogram of a sound stored in a file if len(argv)==3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs*0.040), round(Fs*0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[1] == '-fileChromagram': # show spectogram of a sound stored in a file if len(argv)==3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stChromagram(x, Fs, round(Fs*0.040), round(Fs*0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[1] == "-trainClassifier": # Segment classifier training (OK) if len(argv)>6: method = argv[2] beatFeatures = (int(argv[3])==1) listOfDirs = argv[4:len(argv)-1] modelName = argv[-1] aT.featureAndTrain(listOfDirs, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT = beatFeatures) else: print "Error.\nSyntax: " + argv[0] + " -trainClassifier <method(svm or knn)> <beat features> <directory 1> <directory 2> ... <directory N> <modelName>" elif argv[1] == "-trainRegression": # Segment regression model if len(argv)==6: method = argv[2] beatFeatures = (int(argv[3])==1) dirName = argv[4] modelName = argv[5] aT.featureAndTrainRegression(dirName, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT = beatFeatures) else: print "Error.\nSyntax: " + argv[0] + " -trainRegression <method(svm or knn)> <beat features> <directory> <modelName>" elif argv[1] == "-classifyFile": # Single File Classification (OK) if len(argv)==5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Result, P, classNames] = aT.fileClassification(inputFile, modelName, modelType) print "{0:s}\t{1:s}".format("Class","Probability") for i,c in enumerate(classNames): print "{0:s}\t{1:.2f}".format(c,P[i]) print "Winner class: " + classNames[int(Result)] else: print "Error.\nSyntax: " + argv[0] + " -classifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-regressionFile": # Single File Classification (OK) if len(argv)==5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") R, regressionNames = aT.fileRegression(inputFile, modelName, modelType) for i in range(len(R)): print "{0:s}\t{1:.3f}".format(regressionNames[i], R[i]) #print "{0:s}\t{1:.2f}".format(c,P[i]) else: print "Error.\nSyntax: " + argv[0] + " -regressionFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-classifyFolder": # Directory classification (Ok) if len(argv)==6 or len(argv)==5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if len(argv)==6: outputMode = argv[5] else: outputMode = "0" if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if outputMode not in ["0","1"]: raise Exception("outputMode has to be 0 or 1") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList)==0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) Result = int(Result) Results.append(Result) if outputMode=="1": print "{0:s}\t{1:s}".format(wavFile,classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1)) for i,h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[0] + " -classifyFolder <method(svm or knn)> <modelName> <folderName> <outputMode(0 or 1)" elif argv[1] == "-regressionFolder": # Regression applied on the WAV files of a folder if len(argv)==5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList)==0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: R, regressionNames = aT.fileRegression(wavFile, modelName, modelType) Results.append(R) Results = numpy.array(Results) for i, r in enumerate(regressionNames): [Histogram, bins] = numpy.histogram(Results[:, i]) centers = (bins[0:-1] + bins[1::]) / 2.0 plt.subplot(len(regressionNames), 1, i); plt.plot(centers, Histogram) plt.title(r) plt.show() # for h in Histogram: # print "{0:20d}".format(h), # if outputMode=="1": # for i,h in enumerate(Histogram): # print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[0] + " -regressionFolder <method(svm or knn)> <modelName> <folderName>" elif argv[1] == '-trainHMMsegmenter_fromfile': if len(argv)==7: wavFile = argv[2] gtFile = argv[3] hmmModelName = argv[4] if not uT.isNum(argv[5]): print "Error: mid-term window size must be float!"; return if not uT.isNum(argv[6]): print "Error: mid-term window step must be float!"; return mtWin = float(argv[5]) mtStep = float(argv[6]) if not os.path.isfile(wavFile): print "Error: wavfile does not exist!"; return if not os.path.isfile(gtFile): print "Error: groundtruth does not exist!"; return aS.trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[0] + " -trainHMMsegmenter_fromfile <wavFilePath> <gtSegmentFilePath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[1] == '-trainHMMsegmenter_fromdir': if len(argv)==6: dirPath = argv[2] hmmModelName = argv[3] if not uT.isNum(argv[4]): print "Error: mid-term window size must be float!" if not uT.isNum(argv[5]): print "Error: mid-term window step must be float!" mtWin = float(argv[4]) mtStep = float(argv[5]) aS.trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[0] + " -trainHMMsegmenter_fromdir <dirPath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[1] == "-segmentClassifyFileHMM": # HMM-based segmentation-classification if len(argv)==4: hmmModelName = argv[2] wavFile = argv[3] gtFile = wavFile.replace('.wav', '.segments'); aS.hmmSegmentation(wavFile, hmmModelName, PLOT = True, gtFileName = gtFile) else: print "Error.\nSyntax: " + argv[0] + " -segmentClassifyHMM <hmmModelName> <fileName>" elif argv[1] == '-segmentClassifyFile': # Segmentation-classification (fix-sized segment using knn or svm) if (len(argv)==5): modelType = argv[2] modelName = argv[3] inputWavFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputWavFile): raise Exception("Input audio file not found!") gtFile = inputWavFile.replace('.wav', '.segments'); aS.mtFileClassification(inputWavFile, modelName, modelType, True, gtFile) else: print "Error.\nSyntax: " + argv[0] + " -segmentClassifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-segmentationEvaluation": if len(argv)==5: methodName = argv[2] modelName = argv[3] dirName = argv[4] aS.evaluateSegmentationClassificationDir(dirName, modelName, methodName) else: print "Error.\nSyntax: " + argv[0] + " -segmentationEvaluation <method(svm or knn)> <modelName> <directoryName>" elif argv[1] == "-silenceRemoval": if len(argv)==5: inputFile = argv[2] if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") smoothingWindow = float(argv[3]) weight = float(argv[4]) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio signal segmentLimits = aS.silenceRemoval(x, Fs, 0.05, 0.05, smoothingWindow, weight, False) # get onsets for i, s in enumerate(segmentLimits): strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format(inputFile[0:-4], s[0], s[1]) wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])]) else: print "Error.\nSyntax: " + argv[0] + " -silenceRemoval <inputFile> <smoothinWindow(secs)> <Threshold Weight>" elif argv[1] == '-speakerDiarization': # speaker diarization (from file): TODO inputFile = argv[2] nSpeakers = int(argv[3]) useLDA = (int(argv[4])==1) if useLDA: aS.speakerDiarization(inputFile, nSpeakers, PLOT = True); else: aS.speakerDiarization(inputFile, nSpeakers, LDAdim = 0, PLOT = True); #print speechLimits elif argv[1] == "-speakerDiarizationScriptEval": dir = argv[2] listOfLDAs = [int(l) for l in argv[3::]] aS.speakerDiarizationEvaluateScript(dir, listOfLDAs) elif argv[1] == '-thumbnail': # music thumbnailing (OK) if len(argv)==4: inputFile = argv[2] stWindow = 1.0 stStep = 1.0 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read file if Fs == -1: # could not read file return try: thumbnailSize = float(argv[3]) except ValueError: print "Thumbnail size must be a float (in seconds)" return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, Fs, stWindow, stStep, thumbnailSize) # find thumbnail endpoints # write thumbnails to WAV files: thumbnailFileName1 = inputFile.replace(".wav","_thumb1.wav") thumbnailFileName2 = inputFile.replace(".wav","_thumb2.wav") wavfile.write(thumbnailFileName1, Fs, x[int(Fs*A1):int(Fs*A2)]) wavfile.write(thumbnailFileName2, Fs, x[int(Fs*B1):int(Fs*B2)]) print "1st thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(thumbnailFileName1, A1, A2) print "2nd thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(thumbnailFileName2, B1, B2) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect='auto') plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1/stStep + A2/stStep) / 2.0 Ycenter = (B1/stStep + B2/stStep) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1, Smatrix.shape[0]], [A1, A1], color='k', linestyle='--', linewidth=2) plt.plot([B2, Smatrix.shape[0]], [A2, A2], color='k', linestyle='--', linewidth=2) plt.plot([B1, B1], [A1, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.plot([B2, B2], [A2, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel('frame no') plt.ylabel('frame no') plt.title('Self-similarity matrix') plt.show() else: print "Error.\nSyntax: " + argv[0] + " -thumbnail <filename> <thumbnailsize(seconds)>"
import random as rn import math import operator import numpy as np import audioBasicIO import audioFeatureExtraction dire = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\angry" c = 1 for filename in os.walk(dire): for x in filename[2]: label1 = [] label = [] file = filename[0] + "\\" + str(x) [Fs, x] = audioBasicIO.readAudioFile(file) F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.50 * Fs, 0.25 * Fs) for i in range(len(F[0])): label1.append(3) label.append(label1) G = np.append(F, label, axis=0) loc = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\angrycsv\an" + str( c) + ".csv" c = c + 1 np.savetxt(loc, G, delimiter=",") dire = r"G:\5th sem\ee320 DSP\project\csv\newdata\dataset\testdata\sad" c = 1 for filename in os.walk(dire): for x in filename[2]: label = []
height = 20 k = 0 col, avg, med, std, maxm, minm = [], [], [], [], [], [] emo_labels, signal_data, filename = dirWav( '/media/shreya/New Volume1/datasets/EMO-DB/wav/', '*.wav') output = np.asarray(emo_labels) features = [] feat = [] feature = [] length_features = [] for i in signal_data: temp = audioFeatureExtraction.stFeatureExtraction(i, 16000, 1024, 1024) print 'temp', temp.shape avg.append(temp.mean(axis=1)) med.append(np.median(temp, axis=1)) std.append(np.std(temp, axis=1)) maxm.append(np.amax(temp, axis=1)) minm.append(np.amin(temp, axis=1)) mean = np.asarray(avg) median = np.asarray(med) maximum = np.asarray(maxm) minimum = np.asarray(minm) standard_deviation = np.asarray(std) def reduce_zeroOneNorm(arr):
#!/usr/bin/env python2.7 import audioBasicIO import audioFeatureExtraction import matplotlib.pyplot as plt [Fs_x, x] = audioBasicIO.readAudioFile("emer/1.wav") x = audioBasicIO.stereo2mono(x) F_x = audioFeatureExtraction.stFeatureExtraction(x, Fs_x, 0.050 * Fs_x, 0.025 * Fs_x) [Fs_y, y] = audioBasicIO.readAudioFile("nonemer/9.wav") y = audioBasicIO.stereo2mono(y) F_y = audioFeatureExtraction.stFeatureExtraction(y, Fs_y, 0.050 * Fs_y, 0.025 * Fs_y) plt.subplot(2, 1, 1) plt.plot(F_x[0, :]) plt.xlabel('emer') plt.ylabel('ZCR') plt.subplot(2, 1, 2) plt.plot(F_y[0, :]) plt.xlabel('nonemer') plt.ylabel('ZCR') plt.show() plt.subplot(2, 1, 1) plt.plot(F_x[1, :]) plt.xlabel('emer') plt.ylabel('Energy') plt.subplot(2, 1, 2) plt.plot(F_y[1, :]) plt.xlabel('nonemer')
def analyzeFeatures(self, data): values, features = aF.stFeatureExtraction(data, self.Fs, 0.1 * self.Fs, 0.1 * self.Fs) for index, feature in enumerate(features): print("%s:\t%s" % (feature, features[index]))
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() F = audioFeatureExtraction.stFeatureExtraction( x, Fs, 0.050 * Fs, 0.050 * Fs) t2 = time.clock() perTime1 = duration / (t2 - t1) print("short-term feature extraction: {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() aT.fileClassification("diarizationExample.wav", "svmSM", "svm") t2 = time.clock() perTime1 = duration / (t2 - t1) print("Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() [flagsInd, classesAll, acc] = aS.mtFileClassification( "diarizationExample.wav", "svmSM", "svm", False, '') t2 = time.clock() perTime1 = duration / (t2 - t1) print("Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() aS.hmmSegmentation('diarizationExample.wav', 'hmmRadioSM', False, '') t2 = time.clock() perTime1 = duration / (t2 - t1) print("HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav") segments = aS.silenceRemoval( x, Fs, 0.050, 0.050, smoothWindow=1.0, Weight=0.3, plot=False) t2 = time.clock() perTime1 = duration / (t2 - t1) print("Silence removal \t {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing( x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.clock() perTime1 = duration1 / (t2 - t1) print("Thumbnail \t {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, LDAdim=0, PLOT=False) t2 = time.clock() perTime1 = duration1 / (t2 - t1) print("Diarization \t {0:.1f} x realtime".format(perTime1)) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, PLOT=False) t2 = time.clock() perTime1 = duration1 / (t2 - t1) print("Diarization \t {0:.1f} x realtime".format(perTime1))