def classifyFolderWrapper(inputFolder, model_type, model_name, outputMode=False): if not os.path.isfile(model_name): raise Exception("Input model_name not found!") types = ('*.wav', '*.aif', '*.aiff', '*.mp3') wavFilesList = [] for files in types: wavFilesList.extend(glob.glob((inputFolder + files))) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print("No WAV files found!") return Results = [] for wavFile in wavFilesList: [Result, P, classNames] = aT.fileClassification(wavFile, model_name, model_type) Result = int(Result) Results.append(Result) if outputMode: print("{0:s}\t{1:s}".format(wavFile, classNames[Result])) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames) + 1)) for i, h in enumerate(Histogram): print("{0:20s}\t\t{1:d}".format(classNames[i], h))
def classifyFileWrapper(inputFile, model_type, model_name): if not os.path.isfile(model_name): raise Exception("Input model_name not found!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Result, P, classNames] = aT.fileClassification(inputFile, model_name, model_type) print("{0:s}\t{1:s}".format("Class", "Probability")) for i, c in enumerate(classNames): print("{0:s}\t{1:.2f}".format(c, P[i])) print("Winner class: " + classNames[int(Result)])
def Test(): isSignificant = 0.5 #try different values. # P: list of probabilities Result, P, classNames = aT.fileClassification(fileName, "svmModel", "svm") winner = np.argmax(P) #pick the result with the highest probability value. print (winner) # is the highest value found above the isSignificant threshhold? print(P[winner]) if P[winner] > isSignificant : print("File: " +fileName + " is in category: " + classNames[winner] + ", with probability: " + str(P[winner])) print(str(P)) label2.config(text="FileName: "+str(fileName)) label3.config(text="Predicted Category: "+str(classNames[winner])) label4.config(text="Probability: "+str(P[winner])) else : print("Can't classify sound: " + str(P)) tkMessageBox.showinfo("Can't Classify",str(P))
def detect_class(self, audio): print 'detect_class' print 'save unclassified' file_id, path = self.save_unclassified(audio) print 'file saved', path print 'classify file' detected = trainer.fileClassification( inputFile=path, modelName=self.classifier_path, modelType=self.classifier_type, ) print 'file classified' i = detected[0] res = { 'id': file_id, 'class': detected[2][i], 'probability': detected[1][i] } print detected print res return res
fourier = fourier[0:int(n/2)] # scale by the number of points so that the magnitude does not depend on the length fourier = fourier / float(n) #calculate the frequency at each point in Hz freqArray = np.arange(0, (n/2), 1.0) * (rate*1.0/n); plt.figure(1, figsize=(8,6)) plt.plot(np.arange(len(10*np.log10(fourier))), 10*np.log10(fourier), color='#ff7f00', linewidth=0.02) plt.xlabel('Frequency (kHz)') plt.ylabel('Power (dB)') #==================================== aT.featureAndTrain(["classifierData/music","classifierData/speech"], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svmSMtemp", False) aT.fileClassification(audiofile, "svmSMtemp","svm") Result: (0.0, array([ 0.90156761, 0.09843239]), ['music', 'speech']) # sampling a sine wave programmatically import numpy as np import matplotlib.pyplot as plt plt.style.use('ggplot') rate, audio = wavfile.read(filepath) audio = np.mean(audio, axis=1) # sampling information Fs = 44100 # sample rate T = 1/Fs # sampling period t = 0.1 # seconds of sampling
except: print("File Not Found!") while 1: buf = conn.recv(1024) while(buf): print("Receiving...") f.write(buf) buf = conn.recv(1024) f.close() print("Receiving Done.") conn.send("Thank you for sending.") break try: [Result, P, classNames] = aT.fileClassification("get_apple.wav", "svmGyungmin","svm") print(Result) print(P) print(classNames) except: print("Can't found get_apple.wav!") try: conn.send(str(Result)) conn.send(str(P)) conn.send(str(classNames)) except: print("Can't send results!") try: os.remove("get_apple.wav")
import os import sys from pyAudioAnalysis import audioFeatureExtraction as aF from pyAudioAnalysis import audioTrainTest as aT from pyAudioAnalysis import audioBasicIO if __name__ =='__main__': #classify Result, P, classNames = aT.fileClassification('Speech/Speech_2.wav', 'svmModelTest','svm') print Result print P print classNames
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() F = audioFeatureExtraction.stFeatureExtraction( x, Fs, 0.050 * Fs, 0.050 * Fs) t2 = time.clock() perTime1 = duration / (t2 - t1) print "short-term feature extraction: {0:.1f} x realtime".format( perTime1) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() aT.fileClassification("snakehit.wav", "svmSM", "svm") t2 = time.clock() perTime1 = duration / (t2 - t1) print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() [flagsInd, classesAll, acc] = aS.mtFileClassification("snakehit.wav", "svmSM", "svm", False, '') t2 = time.clock() perTime1 = duration / (t2 - t1) print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() aS.hmmSegmentation('snakehit.wav', 'hmmRadioSM', False, '') t2 = time.clock() perTime1 = duration / (t2 - t1) print "HMM-based classification - segmentation \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") segments = aS.silenceRemoval(x, Fs, 0.050, 0.050, smoothWindow=1.0, Weight=0.3, plot=False) t2 = time.clock() perTime1 = duration / (t2 - t1) print "Silence removal \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.clock() perTime1 = duration1 / (t2 - t1) print "Thumbnail \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("snakehit.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("snakehit.wav", 4, LDAdim=0, PLOT=False) t2 = time.clock() perTime1 = duration1 / (t2 - t1) print "Diarization \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("snakehit.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("snakehit.wav", 4, PLOT=False) t2 = time.clock() perTime1 = duration1 / (t2 - t1) print "Diarization \t {0:.1f} x realtime".format(perTime1)
def main(): # def train(): input('Speak for 5 secs after pressing \'Enter\': ') print('\nRecording') time.sleep(.5) frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print('\nRecording Saved.') stream.stop_stream() stream.close() p.terminate() wf = wave.open('sounds/' + 'output%d.wav' % FILE_NUMBER, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() # >>>>>>FEATURE EXTRACTION [fs, x] = audioBasicIO.readAudioFile('sounds/output%d.wav' % FILE_NUMBER) f, f_names = ShortTermFeatures.feature_extraction(x, fs, 0.050 * fs, 0.025 * fs) print(f_names) print(f) # def trainClassifier(): # >>>>>TRAINING SVM aT.featureAndTrain([ "Male/", "Female/", ], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svm2Classes") aT.fileClassification('sounds/output1.wav', "svm2Classes", svm) # def playAudio (): # Play audio input('To play audio press \'Enter\': ') filename = 'sounds/output1.wav' wave_obj = sa.WaveObject.from_wave_file(filename) play_obj = wave_obj.play() play_obj.wait_done() # Wait until sound has finished playing print("Audio has finished playing") # def manipulate(): [fs, x] = audioBasicIO.readAudioFile('sounds/output%d.wav' % FILE_NUMBER) f, f_names = ShortTermFeatures.feature_extraction(x, fs, 0.050 * fs, 0.025 * fs) input('To manipulate input press \'Enter\': ') # Create an array of random numbers to use as the adversarial input r = np.random.rand(68, 198) print("Adversarial input\n", r) # Create an empty array to allow the user to edit any feature they want. s = (68, 198) e = np.zeros(s) print("Empty data\n", e) # Print the feature values for the original audio clip print("Audio clip\n", f) # Multiply the original audio with manipulated data to see if it can misclassify m = f * r print("Manipulated data\n", m) # def plotGraphs (): # Plotting original input plt.subplot(2, 2, 1) plt.plot(f[0, :]) plt.xlabel('Original') plt.ylabel(f_names[0]) # Plotting adversarial input plt.subplot(2, 2, 2) plt.plot(r[0, :]) plt.xlabel('Adversarial input') # Plotting manipulated data plt.subplot(2, 2, 3) plt.plot(m[0, :]) plt.xlabel('manipulated data') plt.show() # Convert manipulated array back into wav librosa.feature.inverse.mfcc_to_audio(m, n_mels=128, dct_type=2, norm='ortho', ref=1.0, lifter=0, **kwargs)
r = normalize(r) r = trim(r) r = add_silence(r, 0.5) return sample_width, r def record_to_file(path): "Records from the microphone and outputs the resulting data to 'path'" sample_width, data = record() data = pack('<' + ('h' * len(data)), *data) wf = wave.open(path, 'wb') wf.setnchannels(1) wf.setsampwidth(sample_width) wf.setframerate(RATE) wf.writeframes(data) wf.close() if __name__ == '__main__': print("please play music") while True: record_to_file('stream.wav') result = aT.fileClassification("stream.wav", "svmSMtemp", "svm") if result[1][0] > result[1][1]: print("Heavy Metal |m|") else: print("Dance")
from pyAudioAnalysis import audioTrainTest as aT from sys import argv models = ["knn", "svm", "gradientboosting", "extratrees", "randomforest"] overall_score = 0.0 for arg in argv[1:]: for current_model in models: overall_score += aT.fileClassification(arg, current_model + "Model", current_model)[1][0] overall_score /= 5 # print(good_prob) print() print(arg) if overall_score > 0.5: print("This is good file!") else: print("Bad file") print("Avg score: ", overall_score)
from pyAudioAnalysis import audioTrainTest as aT aT.featureAndTrain([ "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/_background_noise_", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/one", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/two", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/three", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/four", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/five", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/six", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/seven", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/eight", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/nine", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/yes", "/home/brandonjabr/pyAudio/pyAudioAnalysis/wav/speech_commands/no" ], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svmSMtemp", False) aT.fileClassification( "/home/brandonjabr/pyAudio/pyAudioAnalysis/my-recordings/yes/1529689771.331.wav", "svmSMtemp", "svm")
# Addition des resultats de plusieurs analyses def addReco(recoList): res = [0, 0, 0, 0, 0, 0] for i in range(len(recoList)): n = maxID(recoList[i]) res[n] += 1 return res # Programme principal params = UDPparam() while (True): recordAudio() (_, reco1, _) = at.fileClassification("records/file.wav", "knnTypeWriterSounds", "knn") (_, reco2, _) = at.fileClassification("records/file.wav", "svmTypeWriterSounds", "svm") (_, reco3, _) = at.fileClassification("records/file.wav", "etTypeWriterSounds", "extratrees") (_, reco4, _) = at.fileClassification("records/file.wav", "gbTypeWriterSounds", "gradientboosting") (_, reco5, _) = at.fileClassification("records/file.wav", "rfTypeWriterSounds", "randomforest") reco = addReco([reco1, reco2, reco3, reco4, reco5]) print("") print(reco1) print(reco2) print(reco3) print(reco4)
#!/usr/local/bin/python2 from pyAudioAnalysis import audioTrainTest as aT # print "hello1" import numpy as np # print "hello2" import RPi.GPIO as GPIO # print "hello3" import time isSignificant = 0.8 #TN/FP Threshold GPIO.setmode(GPIO.BOARD) GPIO.setup(37, GPIO.OUT) # for needs to be replaced with While for i in range(1, 10): # need to determine naming system for files + make sure they exist before running the classifer # + delete file on end of run Result, P, classNames = aT.fileClassification( "trainingData/test/drink_1.wav", "knnDE", "knn") winner = np.argmax(P) #pick the result with the highest probability value. if P[winner] > isSignificant: print("File: drink_1.wav is in category: " + classNames[winner] + ", with probability: " + str(P[winner])) GPIO.output(37, GPIO.HIGH) else: print("Can't classify sound: " + str(P)) time.sleep(5) GPIO.output(37, GPIO.LOW) time.sleep(5)
### Classifies one file with our knn from 'knn_on_emotions.py' from pyAudioAnalysis import audioTrainTest as aT print( aT.fileClassification( "../output_by_emotion/test_input/03-01-01-01-01-01-22.wav", "Models/knnEmotion7", "knn"))
#!python2 from sys import argv import numpy as np import os from pyAudioAnalysis import audioTrainTest as aT script, filedir = argv filename = os.listdir(filedir) for i in filename: Result, P, classNames = aT.fileClassification(filedir + "/" + i, "svmModel", "svm") print(i) winner = np.argmax(P) win = i + " " + str(P[winner]) + " " + classNames[winner] f = open("result.txt", "a") f.write(win + "\n") f.close()
def classFile(self, file): model_file = self.model_file classifierType = self.classifierType verbose = self.verbose added = os.path.getmtime(file) added = time.gmtime(added) added = time.strftime( '\'' + '-'.join(['%Y', '%m', '%d']) + ' ' + ':'.join(['%H', '%M', '%S']) + '\'', added) cleaner = noiseCleaner(verbose=verbose) clean_wav = cleaner.noise_removal(file) Result, P, classNames = aT.fileClassification(clean_wav, model_file, classifierType) if verbose: print file print Result print classNames print P, '\n' result_dict = {} for i in xrange(0, len(classNames)): result_dict[classNames[i]] = P[i] result_dict = sorted(result_dict.items(), key=lambda x: x[1], reverse=True) with open(file, 'rb') as file_contents: sample_id = crc32(file_contents.read()) device_id = -1 # tbi latitude = -1 # tbi longitute = -1 # tbi file_metadata = MediaInfo.parse(file) file_metadata = file_metadata.tracks[0] assert file_metadata.track_type == 'General' humidity = file_metadata.humi temp = file_metadata.temp if humidity == None: humidity = -1 else: humidity = float(humidity) if temp == None: temp = -1 else: temp = float(temp) light = -1 # tbi type1 = '\'' + result_dict[0][0] + '\'' type2 = '\'' + result_dict[1][0] + '\'' type3 = '\'' + result_dict[2][0] + '\'' per1 = result_dict[0][1] per2 = result_dict[1][1] per3 = result_dict[2][1] values = [ sample_id, device_id, added, latitude, longitute, humidity, temp, light, type1, per1, type2, per2, type3, per3 ] values = [str(x) for x in values] with MySQLdb.connect( host=host, user=user, passwd=passwd, db=database) as cur: # config is in config.py: see above query_text = "INSERT INTO sampleInfo (sampleid, deviceid, added, latitude, longitude, humidity, temp, light, type1, per1, type2, per2, type3, per3) values(" + ','.join( values) + ");" try: cur.execute(query_text) except _mysql_exceptions.ProgrammingError, e: if e[0] != 1146: raise else: tbl_create() cur.execute(query_text) except _mysql_exceptions.IntegrityError, e: if e[0] != 1062: raise else: sys.stderr.write("Warning: Duplicate key entry.\n")
#!/usr/local/bin/python2 from sys import argv import numpy as np from pyAudioAnalysis import audioTrainTest as aT from pyAudioAnalysis import audioTrainTest as aT script, filename = argv isSignificant = 0.3 #try different values. #Result, P, classNames = aT.fileClassification(filename, "svmModel", "svm") Result, P, classNames= aT.fileClassification(filename, "svmDTMF","svm") winner = np.argmax(P) if P[winner] > isSignificant : print(" A categoria eh: " + classNames[winner] + ", com probabilidade: " + str(P[winner])) else : print("Impossivel classificar som: " + str(P))
from pyAudioAnalysis import audioTrainTest as aT from pyAudioAnalysis import audioSegmentation as aS from os import path import os import sys modelName = sys.argv[ 1] # Command line argument 1 is the name of the training model. mtw = float(sys.argv[2]) # Command line argument 2 is the mid-term window. mts = float(sys.argv[3]) # Command line argument 3 is the mid-term step. aT.featureAndTrain(["SetA/Good/", "SetA/Bad/"], mtw, mts, aT.shortTermWindow, aT.shortTermStep, modelName, "Models/" + modelName) for fileName in [ f for f in os.listdir("SetB/Good/") if path.isfile(path.join("SetB/Good/", f)) ]: print "Classification result for good file: " + fileName + " is " + aT.fileClassification( "SetB/Good/" + fileName, "Models/" + modelName, modelName) for fileName in [ f for f in os.listdir("SetB/Bad/") if path.isfile(path.join("SetB/Bad/", f)) ]: print "Classification result for bad file: " + fileName + " is " + aT.fileClassification( "SetB/Bad/" + fileName, "Models/" + modelName, modelName)
#!/usr/local/bin/python2 from sys import argv import numpy as np from pyAudioAnalysis import audioTrainTest as aT import pydub pydub.AudioSegment.converter = r"/Users/navrajnarula/Desktop/ffmpeg" script, filename = argv isSignificant = 0.8 #try different values. # P: list of probabilities Result, P, classNames = aT.fileClassification(filename, "svmModel", "svm") print("result is", Result) print("classNames is", classNames) print("P is", P) print("result is", Result) winner = np.argmax(P) #pick the result with the highest probability value. # is the highest value found above the isSignificant threshhold? #if P[winner] > isSignificant : print("File: " + filename + " is in category: " + classNames[winner] + ", with probability: " + str(P[winner])) #else : #print("Can't classify sound: " + str(P))
def classify_dir(dir, trained_machine_name, trained_machine_algorithm, file_extension=".wav"): """ This classifies every file within a specified directory and prints / writes results. :param dir: Directory that we want every file (.wav) to be specified :param trained_machine_name: The name of the machine that has been trained :param trained_machine_algorithm: The type of algorithm used to train the machine (knn,svm,extratrees,gradientboosting,randomforest) :param file_extension: the types of files being read. default files are .wav type :return: void """ #get all files in the directory files_in_directory = get_files_in_directory(dir, file_extension) #clear old file with open(trained_machine_algorithm + "-results" + ".txt", "w") as f: f.write("") #counts the number of correctly predicted emotions correct = 0 #loop through all the files in the directory for file in files_in_directory: #classify the .wav file #dominate_emotion: dominate emotion in classification dominate_emotion, emotion_statistics, emotion_paths = aT.fileClassification( file, trained_machine_name, trained_machine_algorithm) #make sure dominate_emotion has tenth location then convert to string (this is used when finding hte key in the EMOTIONS map) dominate_emotion = str(format(dominate_emotion, '.1f')) #Conver to list emotion_statistics = list(emotion_statistics) #convert to list emotion_paths = list(emotion_paths) #put the results into a readible format dominate_emotion_result, emotions_list_result = extract_results( dominate_emotion, emotion_statistics, emotion_paths) #extract the expected emotion from the file name (modality, vocal_channel, expected_emotion, emotional_intensity, statement, repitition, actor) = get_expected_emotion(file) #make sure expected_emotion has tenth location then convert to string (this is used when finding hte key in the EMOTIONS map) expected_emotion = str(format(int(expected_emotion), '.1f')) expected_emotion = EMOTIONS.get(expected_emotion) (file, file) = file.split('\\') with open(trained_machine_algorithm + "-results" + ".txt", "a+") as f: print("Expected: " + expected_emotion) print(dominate_emotion_result) f.write( "modality-voiceChannel-emotion-emotionalIntensity-statement-repetition-actor \n\n" ) f.write( "1 = neutral, 2 = calm, 3 = happy, 4 = sad, 5 = angry, 6 = fear, 7 = disgust, 8 = surprised \n" ) f.write(file + "\n") f.write("File results: " + trained_machine_algorithm + ".txt" + "\n") f.write("Expected: " + expected_emotion + "\n") f.write(dominate_emotion_result + "\n") for emotion in emotions_list_result: print(emotion) f.write(emotion + "\n") f.write("\n") if expected_emotion == EMOTIONS.get(dominate_emotion): print("here") correct += 1 with open(trained_machine_algorithm + "-results" + ".txt", "a+") as f: print("Correct classifications: " + str(correct) + " Out of " + str(len(files_in_directory))) f.write("Correct classifications: " + str(correct) + " Out of " + str(len(files_in_directory))) print("File results: " + trained_machine_algorithm + ".txt")
def get_intro_skip_timeline(filename): song = AudioSegment.from_wav(filename) segment_start = 0 segment_end = 5000 final_end = len(song) segment_to_analyze = song[0:len(song)] #y1, sr1 = librosa.load('intro.wav') speech_count = 1 prev_music = False segment_arr = [] subchunkarr = [] while segment_end < final_end: chunk = segment_to_analyze[segment_start:segment_end] chunk.export("chunk_test.wav", format="wav") #y2, sr2 = librosa.load("chunk.wav") Result, P, classNames = aT.fileClassification("chunk_test.wav", "svmOffice", "svm") if P[0] > 0.85: print "Time is :", segment_start, " to ", segment_end print P[0], classNames[0] subchunkarr.append([segment_start, segment_end]) prev_music = True else: if prev_music == True: segment_arr.append(subchunkarr) subchunkarr = [] prev_music = False #if prev_music == True: #else: # print classNames[1] #plt.subplot(1, 2, 1) #mfcc1 = librosa.feature.mfcc(y1,sr1) #Computing MFCC values #librosa.display.specshow(mfcc1)plt.subplot(1, 2, 2) #mfcc2 = librosa.feature.mfcc(y2, sr2) #librosa.display.specshow(mfcc2) #dist, cost, acc_cost,path = dtw(mfcc1.T, mfcc2.T,dist=lambda x, y: np.linalg.norm(x - y, ord=1)) #print("The normalized distance between the two : ",dist) # 0 for similar audios #if dist<100: # chunk.export("identifiedintro.wav",format="wav") #else: # chunk.export("speech_vids/speechfile_"+str(speech_count)+".wav",format="wav") # speech_count=speech_count+1 segment_start = segment_start + 2500 segment_end = segment_end + 2500 if prev_music == True: segment_arr.append(subchunkarr) print segment_arr i = 0 maxlen = 0 maxind = 0 while i < len(segment_arr): if len(segment_arr[i]) > maxlen: maxlen = len(segment_arr[i]) maxind = i i += 1 print "Max index is:", maxind print "Array is:", segment_arr[maxind] print "Intro chunk is from", segment_arr[maxind][0][ 0], " to ", segment_arr[maxind][-1][1] intro_chunk = song[segment_arr[maxind][0][0]:segment_arr[maxind][-1][1]] fileDetails = {} skipIntro = {} fileDetails["fileName"] = filename skipIntro["startTime"] = str(segment_arr[maxind][0][0] / 1000) skipIntro["endTime"] = str(segment_arr[maxind][-1][1] / 1000) fileDetails["metadata"] = skipIntro print json.dumps(fileDetails) intro_chunk.export("skip_clip_office.wav", format="wav")
from pyAudioAnalysis import audioTrainTest as att #att.featureAndTrain(['bass','house'],1.0,1.0,att.shortTermWindow,att.shortTermStep,'svm','genre',True) print att.fileClassification('wav/0.wav', 'genre', 'svm') print att.fileClassification('wav/1.wav', 'genre', 'svm') print att.fileClassification('wav/2.wav', 'genre', 'svm') print att.fileClassification('wav/3.wav', 'genre', 'svm') print att.fileClassification('wav/4.wav', 'genre', 'svm') print att.fileClassification('wav/5.wav', 'genre', 'svm') print att.fileClassification('wav/6.wav', 'genre', 'svm') print att.fileClassification('wav/7.wav', 'genre', 'svm') print att.fileClassification('wav/8.wav', 'genre', 'svm') print att.fileClassification('wav/9.wav', 'genre', 'svm') print att.fileClassification('wav/10.wav', 'genre', 'svm') print att.fileClassification('wav/12.wav', 'genre', 'svm') print att.fileClassification('wav/11.wav', 'genre', 'svm') print att.fileClassification('wav/13.wav', 'genre', 'svm') print att.fileClassification('wav/14.wav', 'genre', 'svm')
from pyAudioAnalysis import audioTrainTest as aT model_file = 'gradientboostingx0.1x1.0x0.01x0.1' classifierType = 'gradientboosting' file = '24_44k.wav' Result, P, classNames = aT.fileClassification(file, model_file, classifierType) print Result print P print classNames
i = 0 try: while (True): with sf.SoundFile("sciezki/output{}.wav".format(i), mode='x', samplerate=samplerate, channels=2) as file: with sd.InputStream(samplerate=samplerate, device=2, channels=2, callback=callback): czas = time.time() while (time.time() - czas) <= 5: file.write(q.get()) wynik = aT.fileClassification("sciezki/output{}.wav".format(i), "svmSMtemp", "svm") klasyfikacja = ficzery[int(wynik[0])] if wynik[1][0] < 0.5: print(klasyfikacja) print(i) if klasyfikacja == "alarm" or klasyfikacja == "rakietybaza" or klasyfikacja == "c4" or klasyfikacja == "tlumik" or klasyfikacja == "statek" or klasyfikacja == "helka": client.send(Message(text=klasyfikacja), thread_id='ID', thread_type=ThreadType.GROUP) pass i = i + 1 except KeyboardInterrupt: client.send(Message(text="End Detect"), thread_id='ID',
def classify_genre(filename, chunk_seconds=CHUNK_SECONDS): #print filename return aT.fileClassification( filename, os.path.join(APP_STATIC, 'models/svmMusicGenre3'), "svm", chunk_seconds)
from pyAudioAnalysis import audioTrainTest as aT aT.featureAndTrain(["classifierData/music","classifierData/speech"], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svmSMtemp", False) aT.fileClassification("data/doremi.wav", "svmSMtemp","svm")
from pyAudioAnalysis import audioTrainTest as aT #aT.featureAndTrain(["/home/tyiannak/Desktop/MusicGenre/Classical/","/home/tyiannak/Desktop/MusicGenre/Electronic/","/home/tyiannak/Desktop/MusicGenre/Jazz/"], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svmMusicGenre3", True) aT.featureAndTrain( ["pyAudioAnalysis/data/bones_music/", "pyAudioAnalysis/data/speech_vids/"], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svmBones", False) print aT.fileClassification( "Intro_obtained_from_longest_common_subsequence.wav", "svmBones", "svm")
file_extension) # Split 1 minute of song MusicManager.splitAudiofileWithName("{0}".format(file_path), filename, "".join(file_extension[1:]), 60) else: print("File not found, retry with another track") # Foreach model print probabilities ordered. if (filename_complete): for model in AudioClassifierManager.getAllModels(): for pT in AudioClassifierManager.getPerTrainProportions(): model_name = AudioClassifierManager.getModelNameForTypeAndPt( model, pT) results = dict() if (os.path.isfile('./{0}'.format(model_name))): print("\nModel: {0}".format(model_name)) # get model saved _fileClass = aT.fileClassification(filename_complete, model_name, model) _valuePositiveInMatrix = np.concatenate( np.argwhere(_fileClass[1] > 0), axis=0) for indexGen, gen in enumerate(_fileClass[2]): results[gen] = _fileClass[1][indexGen] for key, value in sorted(results.items(), key=lambda (k, v): (v, k), reverse=True): print "{0}: {1}".format(key, format(value * 100, '.2f')) utils.remove_audio_files_on_current_dir()
# Reading the dataset with .wav files #sr, x = scipy.io.wavfile.read('/home/sarvpsin/Desktop/pyAudioAnalysis/pyAudioAnalysis/Data_mic/gun_shot_wav/102305.wav ') # In[8]: aT.featureAndTrain([ "/home/sarvpsin/Desktop/pyAudioAnalysis/pyAudioAnalysis/Data_mic/gun_shot_wav", "/home/sarvpsin/Desktop/pyAudioAnalysis/pyAudioAnalysis/Data_mic/car_horn_wav" ], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "SVMTry", False) # In[12]: #train test split here aT.fileClassification( "/home/sarvpsin/Desktop/pyAudioAnalysis/pyAudioAnalysis/Data_mic/Handgun_sound_effect_1-youtube-NWezpZms1VA-140-192.wav", "SVMTry", "svm") # In[6]: # knn #accuracy and F1_score # In[7]: #svm #accuracy and F1_score # In[8]: #random forest
from pyAudioAnalysis import audioTrainTest as aT aT.featureAndTrain([ "/home/brandonjabr/pyAudio/pyAudioAnalysis/my-recordings/highlights", "/home/brandonjabr/pyAudio/pyAudioAnalysis/my-recordings/background" ], 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, "svm", "svmSMtemp", False) aT.fileClassification( "/home/brandonjabr/pyAudio/pyAudioAnalysis/my-recordings/highlights/out-02.wav", "svmSMtemp", "svm")
#!/usr/local/bin/python2 import os from sys import argv import numpy as np import pygame import time, sys from pygame import mixer from pyAudioAnalysis import audioTrainTest script, filename = argv pygame.init() isSignificant = 0.33 #try different values. # print argv # P: list of probabilities Result, P, classNames = audioTrainTest.fileClassification( filename, "svmSMtemp", "svm") winner = np.argmax(P) #pick the result with the highest probability value. print classNames[winner] # is the highest value found above the isSignificant threshhold? if P[winner] > isSignificant: print("File: " + filename + " is in category: " + classNames[winner] + ", with probability: " + str(P[winner])) path = "msg/" + classNames[winner] alert_path = "msg/" + classNames[winner] + "_tamil.wav" pygame.mixer.music.load(alert_path) pygame.mixer.music.play() os.system( ' telegram-cli -k server.pub -W -e "send_photo Alert %s" "safe_quit"' % (path + ".jpg")) # pygame.mixer.music.play()
''' Single File Classification ''' import subprocess from pyAudioAnalysis import audioTrainTest as aT Result, P, classNames = aT.fileClassification( "../../audio-source/wave/a2002011001-e02.wav", "pyAudioAnalysis/data/svmMusicGenre6", "svm") # Result, P, classNames = aT.fileClassification("../../audio-source/wave/noexcuses.wav", "pyAudioAnalysis/data/svmMusicGenre6","svm") # print(Result) # 1.0 class ID # print(P) # [0.08675024 0.55253466 0.1331328 0.04243957 0.16545349 0.01968925] probability estimate # print(classNames) # ['Blues', 'Classical', 'Electronic', 'Jazz', 'Rap', 'Rock'] # Command-line use: # python audioAnalysis.py classifyFile -i <inputFilePath> --model <svm, svm_rbf, knn, extratrees, gradientboosting or randomforest> --classifier <pathToClassifierModeL> # Examples: # python audioAnalysis.py classifyFile -i bach.wav --model svm --classifier data/svmMusicGenre3 # python audioAnalysis.py classifyFile -i bach.wav --model knn --classifier data/knnMusicGenre3 ''' Folder Classification ''' # Command-line use examples: # python audioAnalysis.py classifyFolder -i testFolder/ --model svm --classifier data/svmSM (only generates freq counts for each audio class) # python audioAnalysis.py classifyFolder -i testFolder/ --model svm --classifier data/svmSM --details (also outputs the result of each singe WAV file) # Command-Line # ex) data/speechEmotion/ # speech 47
print("finished recording") #stop recording stream.stop_stream() stream.close() audio.terminate() waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb') waveFile.setnchannels(CHANNELS) waveFile.setsampwidth(audio.get_sample_size(FORMAT)) waveFile.setframerate(RATE) waveFile.writeframes(b''.join(frames)) waveFile.close() # P: list of probabilities Result, P, classNames = aT.fileClassification(WAVE_OUTPUT_FILENAME, "svmModel", "svm") #winner = np.argmax(P) #pick the result with the highest probability value. print("scream levels: " + str(P[0])) # is the highest value found above the isSignificant threshhold? if P[0] > isSignificant: #execute bash command like "python3 send_sms.py [matchRate (0-1)] subprocess.call(["python3", "send_sms.py", str(P[0])]) print("File: " + WAVE_OUTPUT_FILENAME + " is in category: " + classNames[0] + ", with probability: " + str(P[0])) else: print("Can't classify sound: " + str(P))
# Team Id: 10 # Author List: Vishal Agarwal, Pratham Desai, Darsh Shah and Amal Dani # Filename: music_vs_speech.py # Functions: main() # Global Variables: NONE from pyAudioAnalysis import audioTrainTest as aT import subprocess import time for i in xrange(1,100): # Recording audio subprocess.call(["arecord", "-d", "5", "-D", "plughw:1", "output.wav"]) # Classifying the recorded file outFileName = "./output.wav" [Result, P, classNames] = aT.fileClassification(outFileName, "./pyAudioAnalysis/data/svmSM","svm") print Result, P, classNames # Changing the channel if probability of speech is > 0.95 if P[0] > 0.95: print("Changing the channel...\n") subprocess.call(["sudo", "adb", "shell", "input", "tap", "632", "978"]) time.sleep(5)
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs); t2 = time.clock() perTime1 = duration / (t2-t1); print "short-term feature extraction: {0:.1f} x realtime".format(perTime1) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() aT.fileClassification("diarizationExample.wav", "svmSM","svm") t2 = time.clock() perTime1 = duration / (t2-t1); print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() [flagsInd, classesAll, acc] = aS.mtFileClassification("diarizationExample.wav", "svmSM", "svm", False, '') t2 = time.clock() perTime1 = duration / (t2-t1); print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() aS.hmmSegmentation('diarizationExample.wav', 'hmmRadioSM', False, '') t2 = time.clock() perTime1 = duration / (t2-t1); print "HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); segments = aS.silenceRemoval(x, Fs, 0.050, 0.050, smoothWindow = 1.0, Weight = 0.3, plot = False) t2 = time.clock() perTime1 = duration / (t2-t1); print "Silence removal \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Thumbnail \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, LDAdim = 0, PLOT = False) t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, PLOT = False) t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1)
def classifyFile(myFile): print aT.fileClassification(myFile, 'svm', 'svm')