def set_name(device_id, voice_path, new_name): print "set " + voice_path + " to: " + new_name new_name = new_name.replace(' ', '') if not new_name.isalnum(): print 'error: SPEAKER_ID must be alphanumeric' return if new_name in db.get_speakers()['U'] or new_name in db.get_speakers( )['M'] or new_name in db.get_speakers()['F']: voice = Voiceid(db, voice_path, single=True) voice.extract_speakers(quiet=True, thrd_n=3) cluster = voice.get_cluster('S0') cluster.set_speaker(new_name) voice.update_db() return try: # assume only one speaker in one sample ww = fm.file2wav(voice_path) file_basename, extension = os.path.splitext(ww) db.add_model(file_basename, new_name) os.remove(file_basename + ".seg") os.remove(file_basename + ".ident.seg") os.remove(file_basename + ".init.gmm") except IOError: print "voice file doesn't exist" except OSError: print "WARNING: error deleting some intermediate files" except TypeError: print "Type error"
def segment_input(wavfile, dbpath='./voicedb'): db = GMMVoiceDB(dbpath) v = Voiceid(db, wavfile) v.extract_speakers() speaker_clusters={} for c in v.get_clusters(): cluster = v.get_cluster(c) print cluster cluster.print_segments() print
def voicerec(fname): db = GMMVoiceDB('models') v = Voiceid(db,fname) v.extract_speakers() txt='' for c in v.get_clusters(): cl= v.get_cluster(c) cluster=str(cl) cluster=cluster.split(' ') cluster=cluster[1] cluster=cluster.strip('(') cluster=cluster.strip(')') txt=txt+'Speaker : '+cluster seg=str(cl.print_segments()) txt=txt+'\n'+seg out.insertPlainText(txt+"\n")
def voicerec(fname): db = GMMVoiceDB('models') v = Voiceid(db, fname) v.extract_speakers() txt = '' for c in v.get_clusters(): cl = v.get_cluster(c) cluster = str(cl) cluster = cluster.split(' ') cluster = cluster[1] cluster = cluster.strip('(') cluster = cluster.strip(')') txt = txt + 'Speaker : ' + cluster seg = str(cl.print_segments()) txt = txt + '\n' + seg out.insertPlainText(txt + "\n")
def recognize(device_id, voice_path): # voice_db_lock.acquire() print db.get_speakers() # assume only one speaker in one sample, To Do: multiple speakers in one sample # set to True to force to avoid diarization, in case a single speaker in the file try: voice = Voiceid(db, voice_path, single=True) # extract_speakers(interactive=False, quiet=False, thrd_n=1) voice.extract_speakers(quiet=True, thrd_n=3) # clusters = voice.get_clusters() cluster = voice.get_cluster('S0') # speaker = cluster.get_best_speaker() speaker = "unknown" speakers = cluster.get_best_five() if len(speakers) > 0: value = speakers[0][1] if value > -33.0: speaker = speakers[0][0] # speaker = cluster.get_speaker() print speaker payload = { 'audio': 'http://52.24.205.33/voice/' + voice_path, 'userName': speaker, 'time': '1' } requests.post('http://129.236.234.21:8080/message', data=json.dumps(payload), headers=HEADERS) client.publish("ais/recognize/result/" + device_id + "/" + voice_path, speaker) os.remove(voice.get_file_basename() + '.seg') os.remove(voice.get_file_basename() + '.g.seg') os.remove(voice.get_file_basename() + '.s.seg') w = voice.get_file_basename() + '.wav' if voice.get_filename() != w: os.remove(w) shutil.rmtree(voice.get_file_basename()) except IOError: print "voice file doesn't exist"
def GetSpeakers(voicedb, audiofile): from voiceid.sr import Voiceid from voiceid.db import GMMVoiceDB import logger try: v = Voiceid(voicedb, audiofile) except: logger.write("e", "File: voicehandler.py | Function: GetSpeakers | Error: Could not call Voiceid constructor!") return False try: v.extract_speakers() except: logger.write("e", "File: voicehandler.py | Function: GetSpeakers | Error: Could not extract speakers from file!") return False try: for c in v.get_clusters(): cluster = v.get_cluster(c) voicedb.set_maxthreads(5) except: logger.write("e", "File: voicehandler.py | Function: GetSpeakers | Error: Could not get clusters!") return False return cluster
def recognition(): global audio, db, label if audio != None: print 'Comparing...' #dic = db.get_speakers() v = Voiceid(db, audio+'.wav') #v.extract_speakers() v.diarization() dic = db.get_speakers() dist = [] d2u = {} for gender in dic: users = dic[gender] print('Gender: ',gender) for u in users: print(db.match_voice(audio+'.wav', u, gender)) dist.append(db.match_voice(audio+'.wav', u, gender)[u]) d2u[dist[-1]]=u print('--------------') print("Best Speaker (Own)") dist.sort(None, None, True) print( (dist[0]-dist[1]+CEIL),EPS,dist ) if (dist[0]-dist[1]+CEIL)>EPS : print(d2u[dist[0]]) #print(dic) print('--------------') print("Best Speaker") print("Audio: ",audio.split('/')[-1]) v.extract_speakers(False, True) for c in v.get_clusters(): cluster = v.get_cluster(c) print(cluster.get_best_speaker() ) print('------------') ''' print 'Comparing...' v = Voiceid(db, audio+'.wav') #s = StringIO.StringIO() #with s as sys.stdout: v.extract_speakers(False, True) #with open('output.txt','w') as sys.stdout: for c in v.get_clusters(): cluster = v.get_cluster(c) user = cluster.get_best_speaker() print('------------') if user != "unknown": print("Best Speaker: %s" % (user) ) print("Speaker: %s" % (cluster.get_speaker()) ) print("Gender: %s" % (cluster.get_gender()) ) print("Distance: %s" % (cluster.get_distance()) ) print("Mean: %s" % (cluster.get_mean()) ) print("Dist. from mean: %s" % (cluster.get_m_distance()) ) print("Best five speakers: " % (cluster.get_best_five()) ) else: print("User Unknown") print("Gender: %s" % (cluster.get_gender()) ) print("Distance: %s" % (cluster.get_distance()) ) print("Best five speakers: " % (cluster.get_best_five()) ) print('------------') #sys.stdout = sys.__stdout__ #labelB["text"] = "Users: " + str(listName)''' print "Done!" else : print 'Record audio first!'
#v = Voiceid(db, '../Test_set_all/Pasolini1971.mp3') #v = Voiceid(db, '/home/felix/Desktop/u/facciamoiconti.mp3') #v = Voiceid(db, '/home/felix/Desktop/u/facciamoiconti/S32.wav') #v = Voiceid(db, '/Users/labcontenuti/Desktop/voiceid/adaltavoce/voci/FabrizioGifuni-promessisposi-p1.wav') #v = Voiceid(db, '/Users/labcontenuti/Desktop/voiceid/adaltavoce/test/Piero_Baldini##mobyDick01_1sec.wav') #v = Voiceid(db, '/Users/labcontenuti/Desktop/voiceid/adaltavoce/test/Piero_Baldini##mobyDick01_3sec.wav') #v = Voiceid(db, '/Users/labcontenuti/Desktop/voiceid/DataSetVideolinaVoci/Giacomo_Mameli/2/Giacomo_Mameli##1-1.wav') v = Voiceid(db, '/Users/labcontenuti/Documents/workspace/activevoice/audio_test/2sec.wav') v.extract_speakers() for c in v.get_clusters(): #print "c ", c cluster = v.get_cluster(c) """ print "cluster.get_name ", cluster.get_name() print "cluster.get_best_speaker ", cluster.get_best_speaker() print "cluster.get_duration ", cluster.get_duration() print "cluster.to_dict ", cluster.to_dict() print "cluster.get_segments ", cluster.get_segments() cluster.print_segments() #cluster.print_segments() """ print "cluster.wave ", cluster.wave print "cluster.get_name ", cluster.get_name() print "\n\n\n\n\n\n" list_seg=cluster.get_segments() for seg in list_seg: print "start %s stop %s" % (humanize_time(float(seg.get_start()) / 100), humanize_time(float(seg.get_end()) / 100))
from voiceid.sr import Voiceid from voiceid.db import GMMVoiceDB import sys # create voice db db = GMMVoiceDB('voiceDB') print "DB Models:" print db.get_speakers() v = Voiceid(db, sys.argv[1]) v.extract_speakers(False, True) # Set cluster speaker c = v.get_cluster('S1') c.set_speaker('derek') c = v.get_cluster('S6') c.set_speaker('derek') # update db v.update_db() print "DB Models:" print db.get_speakers()
def test_dir(): all_wav = [] for f in os.listdir(DIR_PATH): if fnmatch.fnmatch(f, "*.wav") and f.find("##") > -1: all_wav.append(f) all_wav = verify_goodness(all_wav) print all_wav print "verifica ok... procediamo" logging.basicConfig(filename=DIR_PATH + "/result.log", level=logging.INFO) db = GMMVoiceDB("/Users/labcontenuti/.voiceid/gmm_db") # filter_criteria="Fabrizio_Gifuni##02Pasticciaccio.wav" for wav in all_wav: # if wav.find(filter_criteria)>-1: print "--------------------" + wav + "------------------------" v = Voiceid(db, DIR_PATH + wav) v.extract_speakers() logging.info("\n\n\n---------------------" + wav + "------------------") wav_tostring = str(wav) + "; " for c in v.get_clusters(): cluster = v.get_cluster(c) for seg in cluster.get_segments(): print "\n seg: ", seg print seg.get_start() print seg.get_end() print humanize_time(seg.get_duration()) print "cluster.get_duration ", cluster.get_duration() logging.info("duration: " + str(cluster.get_duration())) wav_tostring = wav_tostring + ": " + "duration:" + str(cluster.get_duration()) print "cluster.get_name ", cluster.get_name() print "cluster.get_best_speaker ", cluster.get_best_speaker(), " correct?", verify_speaker( wav, cluster.get_best_speaker() ) logging.info("best_speaker " + str(cluster.get_best_speaker())) wav_tostring = wav_tostring + "; " + "best_speaker:" + str(cluster.get_best_speaker()) print "v._get_time() ", v._get_time(), " ", humanize_time(v._get_time()) logging.info("get_time: " + str(humanize_time(v._get_time()))) wav_tostring = wav_tostring + "; " + "get_time:" + str(humanize_time(v._get_time())) # if cluster.get_best_speaker()=="unknown": print "cluster.get_best_five ", cluster.get_best_five(), cluster.get_best_five()[0] logging.info("best of five: " + str(cluster.get_best_five()[0])) wav_tostring = wav_tostring + "; " + "best of five:" + str(cluster.get_best_five()[0]) print "cluster.get_distance() ", str(cluster.get_distance()) logging.info("distance best-closest " + str(cluster.get_distance())) wav_tostring = wav_tostring + "; " + "distance best-closest:" + str(cluster.get_distance()) print if not cluster.get_best_speaker() == "unknown": if verify_speaker(wav, cluster.get_best_speaker()): find_ok.append(wav_tostring) else: find_errata.append(wav_tostring) else: not_find.append(wav_tostring) if verify_speaker(wav, cluster.get_best_five()[0][0]): unknown_ok.append(wav_tostring) else: unknown_errata.append(wav_tostring) print "Number TEST--" + str(len(find_ok) + len(find_errata) + len(not_find)) print "OK--------- " + str(len(find_ok)) print "ERRATA-------" + str(len(find_errata)) print "UNKNOWN------" + str(len(not_find)) print "UNKNOWN ERR--" + str(len(unknown_errata)) print "UNKNOWN OK--" + str(len(unknown_ok)) # resutl_file=file(DIR_PATH+"result.txt", "a+") logging.info("\n\n") # for wav in unknown_errata: logging.info("Number TEST " + str(len(find_ok) + len(find_errata) + len(not_find))) logging.info("ok " + str(len(find_ok))) logging.info("ERRATA" + str(len(find_errata))) logging.info("UNKNOWN" + str(len(not_find))) logging.info("UNKNOWN ERR " + str(len(unknown_errata))) logging.info("UNKNOWN OK " + str(len(unknown_ok))) logging.info("\n\n\n----UNKNOWN OK ---") for f in unknown_ok: logging.info(f) logging.info("\n\n\n----UNKNOWN ERRATA ---") for f in unknown_errata: logging.info(f) logging.info("\n\n\n----ERRATA---") for f in find_errata: logging.info(f)
#Using voiceid(google opensource project) for voice recognisation using python from voiceid.sr import Voiceid from voiceid.db import GMMVoiceDB db = GMMVoiceDB('mydir') #creating database/directory of registered voices db.add_model('C:\Users\raman\Documents\raman.wma', 'Raman','M') db.add_model('C:\Users\raman\Documents\lalit.wma', 'Lalit','M') db.add_model('C:\Users\raman\Documents\aneesha.wma', 'Aneesha','F') db.add_model('C:\Users\raman\Documents\priyanka.wma', 'Priyanka','F') db.get_speakers() # this will get all the speakers in the database/directory if(db.matchvoice('C:\Users\raman\Documents\lalit.wma', 'Lalit')) #the matchvoice function will compare the voices { print "Lalit is present" } else print "Lalit is absent" v = Voiceid(db) v.extract_speakers() for c in v.get_clusters(): cluster = v.get_cluster(c) print cluster cluster.print_segments()