def extractEmotionsFromAudioFile(frames, params): emotions = {} print("Reading sound file...") # Test waveFile = wave.open("audio.wav", 'wb') waveFile.setnchannels(int(params['channels'])) waveFile.setsampwidth(params['sampwidth']) waveFile.setframerate(params['rate']) waveFile.writeframes(b''.join(frames)) waveFile.close() (sampleRate, samples) = scipy.io.wavfile.read("audio.wav") bufferLen = len(samples) cBuffer = Vokaturi.SampleArrayC(bufferLen) if samples.ndim == 1: cBuffer[:] = samples[:] / 32768.0 # mono else: cBuffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo voice = Vokaturi.Voice(sampleRate, bufferLen) voice.fill(bufferLen, cBuffer) print("Extracting emotions from VokaturiVoice...") # Test quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: emotions["neutral"] = emotionProbabilities.neutrality emotions["happiness"] = emotionProbabilities.happiness emotions["sadness"] = emotionProbabilities.sadness emotions["angry"] = emotionProbabilities.anger emotions["fear"] = emotionProbabilities.fear voice.destroy() return emotions
def get_sentiment(sample_rate, samples): print('Sample rate %.3f Hz' % sample_rate) print('Allocating Vokaturi sample array...') buffer_length = len(samples) print('%d samples, %d channels' % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5*(samples[:,0]+0.0+samples[:,1]) / 32768.0 # stereo print('Creating VokaturiVoice...') try: voice = Vokaturi.Voice (sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: sentiments = { 'neutral': emotionProbabilities.neutrality, 'happy': emotionProbabilities.happiness, 'sad': emotionProbabilities.sadness, 'angry': emotionProbabilities.anger, 'fearful': emotionProbabilities.fear, } print('Sentiments: {}'.format(sentiments)) return sentiments finally: voice.destroy()
def GetVoiceEmo(): # get fresh samples data = stream.read(CHUNKSIZE) samples = np.fromstring(data, dtype=np.int16) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5*(samples[:,0]+0.0+samples[:,1]) / 32768.0 # initialise voice with sample rate and size voice = Vokaturi.Voice(sample_rate, buffer_length) # assign current recorded voice into voice var voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emoProb = Vokaturi.EmotionProbabilities() # get probabilities voice.extract(quality, emoProb) # destroy voice before return value voice.destroy() if quality.valid: return [emoProb.neutrality, emoProb.happiness, emoProb.sadness, emoProb.anger, emoProb.fear] print ("Neutral: %.3f" % emoProb.neutrality) print ("Happy: %.3f" % emoProb.happiness) print ("Sad: %.3f" % emoProb.sadness) print("Angry: %.3f" % emoProb.anger) print ("Fear: %.3f" % emoProb.fear) else: return [0,0,0,0,0] print("Not enough sonorancy to determine emotions") print()
def getProbabilities(blob_name): local_file = "/tmp/" + blob_name wav_file = "/tmp/" + blob_name + ".wav" _blob = bucket.blob(blob_name) _blob.download_to_filename(local_file) subprocess.call(['ffmpeg', '-i', local_file, wav_file]) (sample_rate, samples) = wav_read(wav_file) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) data = {} if quality.valid: data["neutral"] = emotionProbabilities.neutrality data["happiness"] = emotionProbabilities.happiness data["sadness"] = emotionProbabilities.sadness data["anger"] = emotionProbabilities.anger data["fear"] = emotionProbabilities.fear else: data["error"] = "Quality Too Low" voice.destroy() subprocess.Popen(['rm', local_file, wav_file]) return data
def analyzeEmotion(self, filePath): try: (sample_rate, samples) = wavfile.read(filePath) print(" sample rate %.3f Hz" % sample_rate) #print (" samples %.3f Hz" % len(samples)) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) result = filePath if quality.valid: #print ("Neutral: %.3f" % emotionProbabilities.neutrality) #print ("Happy: %.3f" % emotionProbabilities.happiness) #print ("Sad: %.3f" % emotionProbabilities.sadness) #print ("Angry: %.3f" % emotionProbabilities.anger) #print ("Fear: %.3f" % emotionProbabilities.fear) result = result + ";%.3f" % emotionProbabilities.neutrality + ";%.3f" % emotionProbabilities.happiness + ";%.3f" % emotionProbabilities.sadness + ";%.3f" % emotionProbabilities.anger + ";%.3f" % emotionProbabilities.fear else: print("Not enough sonorancy to determine emotions") voice.destroy() return result except: print(filePath)
def vokaturi_analyze(buf, sockname, cur_time): sample_rate = 44100 samples = np.frombuffer(buf, dtype=np.int16) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print "====================================================" print cur_time, "Vokaturi results from " + sockname print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) print "====================================================" voice.destroy()
def analyzeFile(filepath, lineNum): print("in analyzeFile 1") Vokaturi.load("./lib/Vokaturi_mac.so") (sample_rate, samples) = scipy.io.wavfile.read(filepath) print("in analyzeFile 2") buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) print("in analyzeFile 3") voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) print("in analyzeFile 4") if quality.valid: emotions = [ emotionProbabilities.happiness, emotionProbabilities.sadness, emotionProbabilities.anger, emotionProbabilities.fear, emotionProbabilities.neutrality ] if max(emotions) == emotionProbabilities.happiness: maxEmotion = "Happy" elif max(emotions) == emotionProbabilities.sadness: maxEmotion = "Sad" elif max(emotions) == emotionProbabilities.anger: maxEmotion = "Angry" elif max(emotions) == emotionProbabilities.neutrality: maxEmotion = "Neut" else: maxEmotion = "Afraid" stats = ( "Happy: %.3f\tSad: %.3f\tAngry %.3f\tFear %.3f\tNeut %.3f" % (emotions[0], emotions[1], emotions[2], emotions[3], emotions[4])) print("in analyzeFile 5") emotionFile = open("emotions", 'a') print("in analyzeFile 6") writeEmotions(emotionFile, maxEmotion + " " + stats, lineNum) print("in analyzeFile 7") emotionFile.close() print("in analyzeFile 8") else: print("Not enough sonorancy to determine emotions")
def voice_analysis(): print("Loading library...") Vokaturi.load("../SIM/libs/OpenVokaturi-2-1/lib/Vokaturi_mac.so") print("Analyzed by: %s" % Vokaturi.versionAndLicense()) print("Reading sound file...") file_name = "demo.wav" (sample_rate, samples) = scipy.io.wavfile.read(file_name) print(" sample rate %.3f Hz" % sample_rate) print("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: with Connection('amqp://*****:*****@localhost:5672//') as conn: producer = conn.Producer(serializer='json') producer.publish( { 'Neutral': format(emotionProbabilities.neutrality, '.3f'), 'Happy': format(emotionProbabilities.happiness, '.3f'), 'Sad': format(emotionProbabilities.sadness, '.3f'), 'Angry': format(emotionProbabilities.anger, '.3f'), 'Fear': format(emotionProbabilities.fear, '.3f') }, exchange=media_exchange, routing_key='voice', declare=[voice_queue]) # print ("Neutral: %.3f" % emotionProbabilities.neutrality) # print ("Happy: %.3f" % emotionProbabilities.happiness) # print ("Sad: %.3f" % emotionProbabilities.sadness) # print ("Angry: %.3f" % emotionProbabilities.anger) # print ("Fear: %.3f" % emotionProbabilities.fear) else: print("Not enough sonorancy to determine emotions") voice.destroy()
def emotion_recognition(speech): (sample_rate, samples) = scipy.io.wavfile.read(speech) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) #extracting emotions from speech quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) else: print("Can't determine emotions") emotions = { 'Neutral': emotionProbabilities.neutrality, 'Happy': emotionProbabilities.happiness, 'Sad': emotionProbabilities.sadness, 'Angry': emotionProbabilities.anger, 'Fear': emotionProbabilities.fear } #print(emotions) import operator emotion = max(emotions.items(), key=operator.itemgetter(1))[0] value = max(emotions.items(), key=operator.itemgetter(1))[1] print("\n\nEmotion:\n %s with probability %.3f" % (emotion, value)) # # for i in emotions: # print(emotionProbabilities.i) voice.destroy()
def vokatori_fun(file_name): #print("Loading library...") Vokaturi.load( "C:/Users/gokhalea/HackathonCodes/OpenVokaturi-3-0a/OpenVokaturi-3-0a/lib/open/win/OpenVokaturi-3-0-win64.dll" ) #print("Analyzed by: %s" % Vokaturi.versionAndLicense()) #print("Reading sound file...") #file_name = sys.argv[1] #(sample_rate, samples) = scipy.io.wavfile.read(file_name) (sample_rate, samples) = scipy.io.wavfile.read(file_name) #print(" sample rate %.3f Hz" % sample_rate) #print("Samples:" % samples) #print("Allocating Vokaturi sample array...") buffer_length = len(samples) #print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 #print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) #print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) print( "______________________________________________________________________________________________" ) return emotionProbabilities else: print("Not enough sonorancy to determine emotions") voice.destroy() return None
def getEmotionFromWav(filePath): emotionDict = dict() if not (os.path.exists(filePath)): return #reading sound file (sample_rate, samples) = scipy.io.wavfile.read(filePath) print(" sample rate %.3f Hz" % sample_rate) #print("Allocating Vokaturi sample array...") buffer_length = len(samples) #print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo #print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) #print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) #print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) emotionDict["neutrality"] = float("{:.3f}".format( emotionProbabilities.neutrality)) emotionDict["happiness"] = float("{:.3f}".format( emotionProbabilities.happiness)) emotionDict["sadness"] = float("{:.3f}".format( emotionProbabilities.sadness)) emotionDict["anger"] = float("{:.3f}".format( emotionProbabilities.anger)) emotionDict["fear"] = float("{:.3f}".format(emotionProbabilities.fear)) voice.destroy() return emotionDict
def callVokaturi(fileName): # Loading Vokaturi Mac Vokaturi.load("/Users/nchao/Desktop/Yale Hacks/lib/Vokaturi_mac.so") # Reading sound files (.wav) file_name = "/Users/nchao/Desktop/Yale Hacks/" + fileName (sample_rate, samples) = scipy.io.wavfile.read(file_name) # Allocating Vokaturi sample array buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo # Creating VokaturiVoice and filling it with voice sample voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) # Extracting emotions from Vokaturi quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) emoDict = { "Neutral": emotionProbabilities.neutrality, "Happy": emotionProbabilities.happiness, "Sad": emotionProbabilities.sadness, "Angry": emotionProbabilities.anger, "Fear": emotionProbabilities.fear } # Finding main emotion in voice file sortedVals = sorted(emoDict.values())[::-1] stdDev = numpy.std(sortedVals) emotions = [] for percentage in sortedVals: if percentage > abs(max(sortedVals) - 1.5 * stdDev): emotions += [ key for key, val in emoDict.items() if val == percentage ] voice.destroy() return emoDict
def analyze(file_name): # print ("Loading library...") Vokaturi.load("../lib/Vokaturi_linux64.so") # print ("Analyzed by: %s" % Vokaturi.versionAndLicense()) # print ("Reading sound file...") (sample_rate, samples) = scipy.io.wavfile.read(file_name) # print (" sample rate %.3f Hz" % sample_rate) # print ("Allocating Vokaturi sample array...") buffer_length = len(samples) # print (" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # print ("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) # print ("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) # print ("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) enabled = True if quality.valid: # print ("Neutral: %.3f" % emotionProbabilities.neutrality) # print ("Happy: %.3f" % emotionProbabilities.happiness) # print ("Sad: %.3f" % emotionProbabilities.sadness) # print ("Angry: %.3f" % emotionProbabilities.anger) # print ("Fear: %.3f" % emotionProbabilities.fear) value = emotionProbabilities.anger else: value = 0 enabled = False voice.destroy() return enabled, value
def sentiment_analysis(path=sys.argv[1]): print("Loading library...") Vokaturi.load("./OpenVokaturi-3-0-linux64.so") print("Analyzed by: %s" % Vokaturi.versionAndLicense()) print("Reading sound file...") file_name = path (sample_rate, samples) = scipy.io.wavfile.read(file_name) print(" sample rate %.3f Hz" % sample_rate) print("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) out_dict = {} out_dict['neutral'] = emotionProbabilities.neutrality out_dict['happy'] = emotionProbabilities.happiness out_dict['sad'] = emotionProbabilities.sadness out_dict['angry'] = emotionProbabilities.anger out_dict['fear'] = emotionProbabilities.fear voice.destroy() return out_dict
def extract_emotions(file_path): Vokaturi.load(get_vokaturi_lib()) (sample_rate, samples) = scipy.io.wavfile.read(file_path) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) # if quality.valid: print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) emotions = { 'neutrality': "%.3f" % emotionProbabilities.neutrality, 'happiness': "%.3f" % emotionProbabilities.happiness, 'sadness': "%.3f" % emotionProbabilities.sadness, 'anger': "%.3f" % emotionProbabilities.anger, 'fear': "%.3f" % emotionProbabilities.fear } voice.destroy() return emotions
def vokalculate(self, soundArr, samplerate): ''' Calculates the emotionality of a sound sample :param double [] soundArr: Array containing the sound :param int samplerate: samplerate of the sound to process :return dictionary containing each emotions probability from [0,1] , a string with a log and a boolean (true if calculations worked) ''' error = "Starting" buffer_size = len(soundArr) c_buffer = Vokaturi.SampleArrayC(buffer_size) c_buffer[:] = soundArr[:] self.voice = Vokaturi.Voice(samplerate, buffer_size) self.voice.fill(buffer_size, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() self.voice.extract(quality, emotionProbabilities) success = bool(quality.valid) if (quality.valid): error = error + "\n SUCCESS!" else: error = error + "\n Not enough sonorancy to determine emotions" return { "Neutral": emotionProbabilities.neutrality, "Happy": emotionProbabilities.happiness, "Sad": emotionProbabilities.sadness, "Angry": emotionProbabilities.anger, "Fear": emotionProbabilities.fear, "Error": error, "Success": success }
def analyze(file): """Computes EmotionProbabilities from the provided wave file""" global decibel (sample_rate, samples) = scipy.io.wavfile.read(file) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() ep = Vokaturi.EmotionProbabilities() voice.extract(quality, ep) a = ep if quality.valid and MAX_LOUDNESS + decibel > params.MIN_LOUDNESS else Vokaturi.EmotionProbabilities( 0, 0, 0, 0, 0) k, b = mavg(a) show(k, b) set_color(get_color(b)) voice.destroy()
def analyzeAudio(file): (sample_rate, samples) = scipy.io.wavfile.read(file) print(" sample rate %.3f Hz" % sample_rate) print("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: myjson.append([{ "Neutral": emotionProbabilities.neutrality }, { "Happy": emotionProbabilities.happiness }, { "Sad": emotionProbabilities.sadness }, { "Angry": emotionProbabilities.anger }, { "Fear": emotionProbabilities.fear }]) else: print("Not enough sonorancy to determine emotions") voice.destroy()
def analyzeAudio(filename): print("Loading library...") Vokaturi.load("lib/OpenVokaturi-3-3/lib/open/win/OpenVokaturi-3-3-win64.dll") print("Analyzed by: %s" % Vokaturi.versionAndLicense()) print("Reading sound file...") (sample_rate, samples) = scipy.io.wavfile.read(filename) print(" sample rate %.3f Hz" % sample_rate) print("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5*(samples[:,0]+0.0+samples[:,1]) / 32768.0 # stereo print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear) voice.destroy() return emotionProbabilities
def analyze_emotions(file_name): (sample_rate, samples) = scipy.io.wavfile.read(file_name) print(" sample rate %.3f Hz" % sample_rate) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: return (float(emotionProbabilities.neutrality), float(emotionProbabilities.happiness), float(emotionProbabilities.sadness), float(emotionProbabilities.anger), float(emotionProbabilities.fear)) else: return (float(0), float(0), float(0), float(0), float(0))
def emotion_recognition(speech): #import library Vokaturi sys.path.append("../api") Vokaturi.load("../lib/Vokaturi_mac.so") (sample_rate, samples) = scipy.io.wavfile.read(speech) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) #extracting emotions from speech quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: '''print("Neutral: %.3f" % emotionProbabilities.neutrality) print("Happy: %.3f" % emotionProbabilities.happiness) print("Sad: %.3f" % emotionProbabilities.sadness) print("Angry: %.3f" % emotionProbabilities.anger) print("Fear: %.3f" % emotionProbabilities.fear)''' emotion = emotionProbabilities.max percent = emotionProbabilities.max.percent voice.destroy() return emotion, percent
def detectEmotion(self): sample_rate = VocalEmotionEstimatorEngine.RATE samples = np.fromstring(''.join(self.audio2send), dtype=np.int16) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: self.emotion = [ emotionProbabilities.neutrality, emotionProbabilities.happiness, emotionProbabilities.sadness, emotionProbabilities.anger, emotionProbabilities.fear ] print self.emotion voice.destroy()
def listen_me(): global text, duration parser = argparse.ArgumentParser() parser.add_argument('--filename', '-f', default='recording.wav') args = parser.parse_args() # 라이브러리 준비 Vokaturi.load("/home/pi/lib/piZero.so") # 클라우드 스피치, 텍스트 자연어처리, tts 클라이언트 각각 초기화 client = CloudSpeechClient() nlp_client = language.LanguageServiceClient() tts_client = texttospeech.TextToSpeechClient() pos_wavs = [] neut_wavs = [] neg_wavs = [] intro_wavs = [] pos_wavs.append(text_to_audio(tts_client, '진짜?', '0.wav')) pos_wavs.append(text_to_audio(tts_client, '대박', '1.wav')) pos_wavs.append(text_to_audio(tts_client, '우와', '2.wav')) pos_wavs.append(text_to_audio(tts_client, '하하', '3.wav')) neut_wavs.append(text_to_audio(tts_client, '응', '10.wav')) neut_wavs.append(text_to_audio(tts_client, '그렇구나', '11.wav')) neut_wavs.append(text_to_audio(tts_client, '그래서?', '12.wav')) neut_wavs.append(text_to_audio(tts_client, '응응', '13.wav')) neg_wavs.append(text_to_audio(tts_client, '저런', '4.wav')) neg_wavs.append(text_to_audio(tts_client, '힘내', '5.wav')) neg_wavs.append(text_to_audio(tts_client, '에휴', '6.wav')) intro_wavs.append(text_to_audio(tts_client, '들어줄게. 얘기해봐', 'intro0.wav')) intro_wavs.append(text_to_audio(tts_client, '무슨 일 이야?', 'intro1.wav')) play_wav(random.choice(intro_wavs)) logging.basicConfig(level=logging.INFO) with Board() as board: while True: print('말해보자.') text = None duration = 0. emotion = None def wait(): global text, duration start = time.monotonic() while text is None: # 텍스트로 인식 text = client.recognize(language_code='ko-KR') duration = time.monotonic() - start # 녹음하면서 record_file(AudioFormat.CD, filename=args.filename, wait=wait, filetype='wav') print(text) print('Recorded: %.02f seconds' % duration) if text in ['들어줘서 고마워', '내 얘기 들어줘서 고마워', '어시스턴트', '잘가', '잘 가']: return # 텍스트 감정 분석 document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT) sentiment = nlp_client.analyze_sentiment( document=document).document_sentiment print('텍스트 감정 분석*********************************') print('Text: {}'.format(text)) print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude)) ##################### 실험후 바꿔도 됨 #################### pos_standard = 0.6 neg_standard = 0.1 # magnitude_standard = 0.1 # text sentiment analysis is enough if (sentiment.score < neg_standard or sentiment.score > pos_standard): if sentiment.score < neg_standard: emotion = False print("@@@negative") else: emotion = True print("@@@positive") else: # 녹음 파일 감정 분석 print('오디오 감정 분석*********************************') (sample_rate, samples) = scipy.io.wavfile.read(args.filename) # print (" sample rate %.3f Hz" % sample_rate) # print ("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # print ("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) # print ("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) # print ("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: # print ("Neutral: %.3f" % emotionProbabilities.neutrality) # print ("Happy: %.3f" % emotionProbabilities.happiness) # print ("Sad: %.3f" % emotionProbabilities.sadness) # print ("Angry: %.3f" % emotionProbabilities.anger) # print ("Fear: %.3f" % emotionProbabilities.fear) # fear 는 무시하도록 하자. wave_score = emotionProbabilities.happiness - ( emotionProbabilities.sadness + emotionProbabilities.anger) if wave_score > 0 and sentiment.score > 0.4: print('@@@긍정') emotion = True elif wave_score < 0 and sentiment.score < 0.4: print('@@@부정') emotion = False # text 스코어와 wave 스코어가 불일치 할때는 중립반응 (emotion = None) # 여기서 부터 반응. with Leds() as leds: if emotion is True: play_wav(random.choice(pos_wavs)) leds.pattern = Pattern.blink(100) color = (255, 255, 0) leds.update(Leds.rgb_pattern(color)) time.sleep(1) # play_wav('laugh.wav') elif emotion is False: play_wav(random.choice(neg_wavs)) leds.pattern = Pattern.breathe(1000) color = (102, 140, 255) leds.update(Leds.rgb_on(color)) time.sleep(1) # play_wav('people-cheering.wav') # 중립 리액션 else: play_wav(random.choice(neut_wavs)) leds.pattern = Pattern.blink(5) color = (230, 0, 115) leds.update(Leds.rgb_on(color)) time.sleep(1)
"\n please input the name of your next file, type 'done' when finished: " ) files.append(file_name) fileCount += 1 #calculates emotion probabilites from files[] - the last entry # for i in range(fileCount - 1): print("Calculating...") print('-- ' + files[i]) file_name = files[i] (sample_rate, samples) = scipy.io.wavfile.read(file_name) buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) soundQuality = Vokaturi.Quality() emoProbs = Vokaturi.EmotionProbabilities() voice.extract(soundQuality, emoProbs) if soundQuality.valid: emoNeut += emoProbs.neutrality
def processRequest(req): #Obtain info from the query in Dialogflow result = req.get("queryResult") parameters = result.get("parameters") folder_name = parameters.get("FolderType") #verify credentials to use google drive API & get Google API client (or something like that) service = authentication() #verify credentials to use google sheet API & get Google sheet wks = open_gsheet() #check for file in drive (file_name , file_id) = get_wav_file(folder_name,service) if not file_name: #If None, this will be false -> then flipped to true return { "fulfillmentText": "No such file in drive" } request = service.files().get_media(fileId=file_id) #downloads binary data of wav file and stored in a buffered stream fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() #parse buffered stream into Vokaturi (TO-DO: FIX BYTE CONVERSION) buffer_length = fh.getbuffer().nbytes c_buffer = Vokaturi.SampleArrayC(buffer_length) c_buffer[:] = fh.getvalue() voice = Vokaturi.Voice (8000, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) #Output data from Vokaturi & save to Google sheet output = "The results of the analysis of " + file_name + " is... " if quality.valid: output += 'Neutral: %.5f, ' % emotionProbabilities.neutrality output += 'Happiness: %.5f, ' % emotionProbabilities.happiness output += 'Sadness: %.5f, ' % emotionProbabilities.sadness output += 'Anger: %.5f, ' % emotionProbabilities.anger output += 'Fear: %.5f' % emotionProbabilities.fear if wks.cell(row, 1).value == "": wks.update_cell(row, 1, file_name) wks.update_cell(row, 2, '%0.5f' % emotionProbabilities.neutrality) wks.update_cell(row, 3, '%0.5f' % emotionProbabilities.happiness) wks.update_cell(row, 4, '%0.5f' % emotionProbabilities.sadness) wks.update_cell(row, 5, '%0.5f' % emotionProbabilities.anger) wks.update_cell(row, 6, '%0.5f' % emotionProbabilities.fear) else: while wks.cell(row, 1).value != "": row += 1 wks.update_cell(row, 1, file_name) wks.update_cell(row, 2, '%0.5f' % emotionProbabilities.neutrality) wks.update_cell(row, 3, '%0.5f' % emotionProbabilities.happiness) wks.update_cell(row, 4, '%0.5f' % emotionProbabilities.sadness) wks.update_cell(row, 5, '%0.5f' % emotionProbabilities.anger) wks.update_cell(row, 6, '%0.5f' % emotionProbabilities.fear) else: output += "Not enough sonorancy to determine emotions" voice.destroy() return { "fulfillmentText": output }
def listen_me(): global text, duration parser = argparse.ArgumentParser() parser.add_argument('--filename', '-f', default='recording.wav') args = parser.parse_args() # 라이브러리 준비 Vokaturi.load("/home/pi/lib/piZero.so") # 클라우드 스피치랑 텍스트 자연어처리 클라이언트 각각 초기화 client = CloudSpeechClient() nlp_client = language.LanguageServiceClient() logging.basicConfig(level=logging.INFO) with Board() as board: while True: print('말해보자.') text = None duration = 0. emotion = None def wait(): global text, duration start = time.monotonic() while text is None: # 텍스트로 인식 text = client.recognize(language_code='ko-KR') duration = time.monotonic() - start # 녹음하면서 record_file(AudioFormat.CD, filename=args.filename, wait=wait, filetype='wav') print(text) print('Recorded: %.02f seconds' % duration) if text in ['들어줘서 고마워', '내 얘기 들어줘서 고마워', '어시스턴트', '잘가', '잘 가']: return # 텍스트 감정 분석 document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT) sentiment = nlp_client.analyze_sentiment( document=document).document_sentiment print('텍스트 감정 분석*********************************') print('Text: {}'.format(text)) print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude)) ##################### 실험후 바꿔도 됨 #################### pos_standard = 0.6 neg_standard = 0.1 # magnitude_standard = 0.1 # text sentiment analysis is enough if (sentiment.score < neg_standard or sentiment.score > pos_standard): if sentiment.score < neg_standard: emotion = False print("@@@negative") else: emotion = True print("@@@positive") else: # 녹음 파일 감정 분석 print('오디오 감정 분석*********************************') (sample_rate, samples) = scipy.io.wavfile.read(args.filename) # print (" sample rate %.3f Hz" % sample_rate) # print ("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # print ("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) # print ("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) # print ("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: # print ("Neutral: %.3f" % emotionProbabilities.neutrality) # print ("Happy: %.3f" % emotionProbabilities.happiness) # print ("Sad: %.3f" % emotionProbabilities.sadness) # print ("Angry: %.3f" % emotionProbabilities.anger) # print ("Fear: %.3f" % emotionProbabilities.fear) # fear 는 무시하도록 하자. wave_score = emotionProbabilities.happiness - ( emotionProbabilities.sadness + emotionProbabilities.anger) if wave_score > 0: print('@@@긍정') emotion = True else: print('@@@부정') emotion = False # text 분석 모호하고 wave 분석 실패했을때 (주로 목소리 짧아서) if emotion is None: print('please say again') # 아님 중립적 반응 넣어도 됨. continue # 여기서 부터 반응. with Leds() as leds: if emotion is True: # tts.say('I am glad to hear that.') # tts.say('진짜? 대박.') leds.pattern = Pattern.blink(100) color = (255, 255, 0) leds.update(Leds.rgb_pattern(color)) time.sleep(1) # play_wav('laugh.wav') else: # tts.say('I am sorry to hear that.') # tts.say('저런. 힘내.') leds.pattern = Pattern.breathe(1000) color = (102, 140, 255) leds.update(Leds.rgb_on(color)) time.sleep(1)
def displayProfile(self): self.NameEntry.focus() self.NameEntry.delete(0, END) #self.NameEntry.insert(0, "Hi Mitali") self.WPMEntry.focus() self.WPMEntry.delete(0, END) self.Audio_Length_Secs = 150 #librosa.get_duration(filename='microphone-results.wav') self.Audio_Length_Mins = self.Audio_Length_Secs/60 self.WPM.set(len(self.wordsList)/self.Audio_Length_Mins) self.WPMEntry.insert(0, "You are able to speak %.2f words per min" % self.WPM.get()) self.AccuracyEntry.focus() self.AccuracyEntry.delete(0, END) #WER = wer(self.sample_text , self.message, standardize=True) WER, displayList = self.werCustom(self.sample_text , self.message) formattedText = " ".join(displayList) displayText = formattedText.replace('#!<sb>!#', '').replace('#!<se>!#', '').replace('#!<db>!#', '').replace('#!<de>!#', '').replace('#!<ib>!#', '').replace('#!<ie>!#', '').replace('#!<sbe>!#', '') self.text.insert(INSERT, displayText) #self.text.insert(INSERT, formattedText) sbindexes =[m.start() for m in re.finditer('#!<sb>!#', formattedText)] seindexes =[m.start() for m in re.finditer('#!<se>!#', formattedText)] dbindexes =[m.start() for m in re.finditer('#!<db>!#', formattedText)] deindexes =[m.start() for m in re.finditer('#!<de>!#', formattedText)] ibindexes =[m.start() for m in re.finditer('#!<ib>!#', formattedText)] ieindexes =[m.start() for m in re.finditer('#!<ie>!#', formattedText)] sub = "" Del = "" Ins = "" for i in range(len(sbindexes)): sbText = re.findall(r'#!<sb>!#', formattedText[0:sbindexes[i]]) dbText = re.findall(r'#!<db>!#', formattedText[0:sbindexes[i]]) ibText = re.findall(r'#!<ib>!#', formattedText[0:sbindexes[i]]) sbCount = len(sbText)+len(dbText)+len(ibText) b = sbindexes[i]-(sbCount * 16) e = seindexes[i]-((sbCount * 16)+8) t = displayText[seindexes[i]-((sbCount * 16)+8): ] tagWord = displayText[sbindexes[i]-(sbCount * 16) : seindexes[i]-((sbCount * 16)+8)] endIndex = self.text.search(t, INSERT) startIndex = endIndex + ('-%dc' % len(tagWord)) sub = sub + "," + tagWord +"_" +str(e)+"-"+ startIndex + ":" + endIndex self.text.tag_add("substitutes", startIndex, endIndex) # tag and select found string self.text.focus() # select text widget itself #self.lblSub['text'] = sub for i in range(len(dbindexes)): sbText = re.findall(r'#!<sb>!#', formattedText[0:dbindexes[i]]) dbText = re.findall(r'#!<db>!#', formattedText[0:dbindexes[i]]) ibText = re.findall(r'#!<ib>!#', formattedText[0:dbindexes[i]]) sbCount = len(sbText)+len(dbText)+len(ibText) b = dbindexes[i]-(sbCount * 16) e = deindexes[i]-((sbCount * 16)+8) t = displayText[deindexes[i]-((sbCount * 16)+8): ] tagWord = displayText[dbindexes[i]-(sbCount * 16) : deindexes[i]-((sbCount * 16)+8)] endIndex = self.text.search(t, INSERT) startIndex = endIndex + ('-%dc' % len(tagWord)) Del = Del + "," + startIndex + ":" + endIndex self.text.tag_add("deletions", startIndex , endIndex ) for i in range(len(ibindexes)): sbText = re.findall(r'#!<sb>!#', formattedText[0:ibindexes[i]]) dbText = re.findall(r'#!<db>!#', formattedText[0:ibindexes[i]]) ibText = re.findall(r'#!<ib>!#', formattedText[0:ibindexes[i]]) sbCount = len(sbText)+len(dbText)+len(ibText) b = ibindexes[i]-(sbCount * 16) e = ieindexes[i]-((sbCount * 16)+8) t = displayText[ieindexes[i]-((sbCount * 16)+8): ] tagWord = displayText[ibindexes[i]-(sbCount * 16) : ieindexes[i]-((sbCount * 16)+8)] endIndex = self.text.search(t, INSERT) startIndex = endIndex + ('-%dc' % len(tagWord)) Ins = Ins + "," + startIndex + ":" + endIndex self.text.tag_add("insertions", startIndex , endIndex ) #self.lblSub['text'] = sub #self.lblIns['text'] = Ins #self.lblDel['text'] = Del self.text.tag_config("substitutes", background="yellow", foreground="blue") self.text.tag_config("deletions", background="red", foreground="green") self.text.tag_config("insertions", background="green", foreground="yellow") Acc = (1 - WER)*100 self.AccuracyEntry.insert(0, "%f percent" % Acc) Vokaturi.load(os.path.abspath('.')+"/OpenVokaturi-3-3/lib/OpenVokaturi-3-3-mac64.dylib") (sample_rate, samples) = scipy.io.wavfile.read('microphone-results.wav') buffer_length = len(samples) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5*(samples[:,0]+0.0+samples[:,1]) / 32768.0 # stereo #print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) #print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) #print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: #print("Neutral: %.3f" % emotionProbabilities.neutrality) #print("Happy: %.3f" % emotionProbabilities.happiness) #print("Sad: %.3f" % emotionProbabilities.sadness) #print("Angry: %.3f" % emotionProbabilities.anger) #print("Fear: %.3f" % emotionProbabilities.fear) dictemotionProbabilities = dict() dictemotionProbabilities['Neutral'] = emotionProbabilities.neutrality dictemotionProbabilities['Happy'] = emotionProbabilities.happiness dictemotionProbabilities['Sad'] = emotionProbabilities.sadness dictemotionProbabilities['Angry'] = emotionProbabilities.anger dictemotionProbabilities['Fear'] = emotionProbabilities.fear self.EmoEntry.focus() self.EmoEntry.delete(0, END) emo = max(dictemotionProbabilities, key=dictemotionProbabilities.get) self.EmoEntry.insert(0, emo) else: self.EmoEntry.focus() self.EmoEntry.delete(0, END) self.EmoEntry.insert(0, 'could not find emotion from the voice') voice.destroy()
def audio_thread(): p = pyaudio.PyAudio() for ii in range(p.get_device_count()): print(str(ii) + " " + p.get_device_info_by_index(ii).get('name')) form_1 = pyaudio.paInt16 # 16-bit resolution chans = 1 # 1 channel samp_rate = 44100 # 44.1kHz sampling rate chunk = 132096 # number of recorded samples for buffer record_secs = 3 # seconds to record dev_index = 2 # device index found by p.get_device_info_by_index(ii) i = 0 while (True): sleep(1) # create pyaudio stream i = i + 1 wav_output_filename = '../sounds/test' + str( i) + '.wav' # name of .wav file audio = pyaudio.PyAudio() stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \ input_device_index = dev_index,input = True, \ frames_per_buffer=chunk) #print("[AUDIO] recording") frames = [] # loop through stream and append audio chunks to frame array for ii in range(0, int((samp_rate / chunk) * record_secs)): data = stream.read(chunk) frames.append(data) #print("[AUDIO] finished recording") # stop the stream, close it, and terminate the pyaudio instantiation stream.stop_stream() stream.close() audio.terminate() # save the audio frames as .wav file wavefile = wave.open(wav_output_filename, 'wb') wavefile.setnchannels(chans) wavefile.setsampwidth(audio.get_sample_size(form_1)) wavefile.setframerate(samp_rate) wavefile.writeframes(b''.join(frames)) wavefile.close() #print ("[AUDIO] Filename: " + wav_output_filename) (sample_rate, samples) = scipy.io.wavfile.read(wav_output_filename) #print ("[AUDIO] sample rate %.3f Hz" % sample_rate) buffer_length = len(samples) #print ("[AUDIO] %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: # mono c_buffer[:] = samples[:] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 voice = Vokaturi.Voice(sample_rate, buffer_length) voice.fill(buffer_length, c_buffer) quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) if quality.valid: print("[AUDIO] Neutral: %.3f" % emotionProbabilities.neutrality) print("[AUDIO] Happy: %.3f" % emotionProbabilities.happiness) print("[AUDIO] Sad: %.3f" % emotionProbabilities.sadness) print("[AUDIO] Angry: %.3f" % emotionProbabilities.anger) print("[AUDIO] Fear: %.3f" % emotionProbabilities.fear) audio_emotions_mutex.acquire() audio_emotions[_NEUTRAL] = emotionProbabilities.neutrality audio_emotions[_HAPPINESS] = emotionProbabilities.happiness audio_emotions[_SADNESS] = emotionProbabilities.sadness audio_emotions[_ANGER] = emotionProbabilities.anger audio_emotions[_FEAR] = emotionProbabilities.fear audio_emotions_mutex.release() else: print("[AUDIO] Not enough sonorancy to determine emotions") voice.destroy()
def voice_predict(): file_name = "Audio/audio.wav" print("Loading library...") Vokaturi.load("OpenVokaturi-2-1d/lib/Vokaturi_mac64.so") print("Analyzed by: %s" % Vokaturi.versionAndLicense()) print("Reading sound file...") (sample_rate, samples) = scipy.io.wavfile.read(file_name) print(" sample rate %.3f Hz" % sample_rate) print("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(buffer_length) if samples.ndim == 1: c_buffer[:] = samples[:] / 32768.0 # mono else: c_buffer[:] = 0.5 * (samples[:, 0] + 0.0 + samples[:, 1]) / 32768.0 # stereo print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) print("Filling VokaturiVoice with samples...") voice.fill(buffer_length, c_buffer) print("Extracting emotions from VokaturiVoice...") quality = Vokaturi.Quality() emotionProbabilities = Vokaturi.EmotionProbabilities() voice.extract(quality, emotionProbabilities) fh = open("output.txt", 'a') fh.write("Based on your voice, your emotion is ") if not quality.valid: fh.write("beyond my understanding.\n") exit(1) print_result = [(round(emotionProbabilities.neutrality*100), "neutral"), \ (round(emotionProbabilities.happiness*100), "happy"), \ (round(emotionProbabilities.anger*100), "angry"), \ (round(emotionProbabilities.fear*100),"fearful"), \ (round(emotionProbabilities.sadness*100), "sad")] print_result = [tup for tup in print_result if tup[0] != 0] print_result.sort(key=lambda tup: tup[0]) if len(print_result) == 0: fh.write("beyond my understanding.\n") elif len(print_result) == 1: fh.write("dominantly %d percent %s.\n" % (print_result[0][0], print_result[0][1])) else: for i in range(len(print_result) - 1, 0, -1): fh.write("%d percent %s, " % (print_result[i][0], print_result[i][1])) fh.write("and %d percent %s.\n" % (print_result[0][0], print_result[0][1])) fh.close() with open("output.txt") as f1: with open("templates.yaml", "w") as f2: f2.write("welcome: Ready to hear your comments?\n\nround: ") for line in f1: f2.write(line.strip("\n")) f2.write(" ") voice.destroy()
sys.path.append("../api") import Vokaturi print("Loading library...") Vokaturi.load("../lib/open/macos/OpenVokaturi-3-3-mac64.dylib") print("Analyzed by: %s" % Vokaturi.versionAndLicense()) print("Reading sound file...") file_name = sys.argv[1] (sample_rate, samples) = scipy.io.wavfile.read(file_name) print(" sample rate %.3f Hz" % sample_rate) print("Allocating Vokaturi sample array...") buffer_length = len(samples) print(" %d samples, %d channels" % (buffer_length, samples.ndim)) c_buffer = Vokaturi.SampleArrayC(sample_rate) print("Creating VokaturiVoice...") voice = Vokaturi.Voice(sample_rate, buffer_length) numberOfSeconds = int(buffer_length / sample_rate) print("Start(s) End(s) Neutral Happy Sad Angry Fear") for isecond in range(0, numberOfSeconds): startSample = round(isecond * sample_rate) endSample = round((isecond + 1) * sample_rate) print(startSample, endSample) if samples.ndim == 1: # mono c_buffer[:] = samples[startSample:endSample] / 32768.0 else: # stereo c_buffer[:] = 0.5 * (samples[startSample:endSample, 0] + 0.0 + samples[startSample:endSample, 1]) / 32768.0