def test_speaker_realtime(source): modelpath = "C:\\Users\\USER\\GUI_SpeakerID\\speaker_models\\" gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] #Load the Gaussian gender Models models = [pickle.load(open(fname, 'rb')) for fname in gmm_files] speakers = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files rate, audio = read(source) vector = extract_features(audio, rate) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print("\tdetected as -", speakers[winner]) time.sleep(1.0) globals.speaker_realtime = speakers[winner]
def train(): warnings.filterwarnings("ignore") # path to training data source = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\18apr2018\\Training\\" modelpath = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\18apr2018\\TestSource\\" dest_dir = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\18apr2018\\Tilak\\wav\\" test_file = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\18apr2018\\test.txt" prefix = "Tilak\\wav\\" file_paths = open(test_file, 'r') record_audio("testVoice", dest_dir) enroll_edit("testVoice", test_file, prefix) gmm_files = [ os.path.join(source, fname) for fname in os.listdir(source) if fname.endswith('.gmm') ] # load model models = [cpk.load(open(fname, 'rb')) for fname in gmm_files] speakers = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files] sr, audio = read(dest_dir + "testVoice.wav") vector = extract_features(audio, sr)
def gmm_graph(dir_path, file_name): sr, audio = read(dir_path + file_name + ".wav") vector = extract_features(audio, sr) features = vector model = GaussianMixture(n_components=16, covariance_type='full', n_init=3, init_params='random', random_state=0, tol=1e-9, max_iter=200) features = np.array(features).reshape(-1, 1) model.fit(features) gmm_x = np.array( np.linspace(np.min(features), np.max(features), len(features))).reshape(-1, 1) gmm_y = np.exp(model.score_samples(gmm_x)) fig, ax = plt.subplots() ax.hist(features, bins=50, normed=True, alpha=0.5, color="#0070FF") ax.plot(gmm_x, gmm_y, color="black", lw=1, label="Gaussian Mixture 1D Plot") ax.set_ylabel("Probability density") ax.set_xlabel("Arbitrary units") plt.show()
def activity_detect(): print "activity detecting....." global activities #path to training data modelpath = "activity_models/" path = "record.wav" gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.pickle') ] #Load the Gaussian gender Models models = [cPickle.load(open(fname, 'r')) for fname in gmm_files] speakers = [ fname.split("/")[-1].split(".pickle")[0] for fname in gmm_files ] # Read the test directory and get the list of test audio files path = path.strip() sr, audio = read(path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() #print speakers,log_likelihood winner = np.argmax(log_likelihood) #print "\tdetected as - ", speakers[winner] activities = speakers[winner]
def train(self, samples=3): audio_paths = glob.glob(self.audio_path + '/*.wav') count = 1 for path in audio_paths: user = path.split('\\')[-1].split('_')[0] print(user) # Extracting features for each speaker (5 files per speakers) features = np.asarray(()) print(path) sr, audio = read(path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) if count == samples: gmm = sklearn.mixture.GaussianMixture(n_components=16, max_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = os.path.join(self.audio_path, user.split('.')[0] + ".gmm") pickle.dump(gmm, open(picklefile, 'wb')) print('modeling completed for speaker:', picklefile, " with data point = ", features.shape) features = np.asarray(()) count = 0 count = count + 1
def predictSpeaker(self): file_paths = open(test_file, 'r') gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] fname = os.path.join(modelpath, fname) #Load the Gaussian gender Models for fname in gmm_files: models = [cPickle.load(open(fname)) for fname in gmm_files] speakers = [ fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files ] # Read the test directory and get the list of test audio files for path in file_paths: path = path.strip() print path sr, audio = read(source + path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() print log_likelihood[i] winner = np.argmax(log_likelihood) print "\tdetected as - ", speakers[winner]
def gmm_cluster_graph(dir_path, file_name): sr, audio = read(dir_path+file_name+".wav") vector = extract_features(audio, sr) features = vector model= GaussianMixture(n_components=16, covariance_type='diag',n_init=3, init_params='random', random_state=0, tol=1e-9, max_iter=200) model.fit(features) labels=model.predict(features) plt.scatter(features[:,0], features[:,1], s=10, linewidth=1, cmap=plt.cm.get_cmap('tab20',16), c=labels) plt.title("Gaussian Mixture Model") plt.show()
def count(): print "speaker counting.........." global speaker_name_len global speaker_num speaker_name_total = [] file_paths = [] num = filter_segment() if num is 0: peoplenum = 0 elif num is 1: peoplenum = 1 else: for i in range(num - 1): #舍弃最后一段 chunk_name = "chunk_wavs/chunk{0}.wav".format(i) file_paths.append(chunk_name) #path to speaker training data modelpath = "speaker_models/" #speaker gmm model gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.pickle') ] #activity gmm model models = [cPickle.load(open(fname, 'r')) for fname in gmm_files] #speaker label speakers = [ fname.split("/")[-1].split(".pickle")[0] for fname in gmm_files ] # Read the test directory and get the list of test audio files #人数及说话时长计算 for path in file_paths: #print file_paths path = path.strip() sr, audio = read(path) vector = extract_features(audio, sr) #计算人数及说话时长 log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() #print speakers,log_likelihood winner = np.argmax(log_likelihood) print "detected as - ", speakers[winner] speaker_name_total.append(speakers[winner]) #delete repeat speaker speaker_name = list(set(speaker_name_total)) for item in speaker_name: speaker_len = speaker_name_total.count(item) * 3 #每个说话者的时长 speaker_num.append(speaker_len) speaker_name_len.update({item: speaker_len}) #将说话者及说话者的长度存储成字典 print "speaker count done"
def chooseAction(): path = 'output.wav' sr, audio = read(path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) return speakers[winner]
def createModel(self, destination): dest = destination file_name = dest.split('/') #print file_name file_name = file_name[-2] #print file_name wav_files = [ f for f in os.listdir(Trainer.source) if f.endswith('.wav') ] wavFileCount = len(wav_files) count = 1 # Extracting features for each speaker (5 files per speakers) features = np.asarray(()) for path in wav_files: #path = path.strip() #print path # read the audio sr, audio = read(Trainer.source + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training if count == wavFileCount: gmm = GMM(n_components=16, n_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = file_name + ".gmm" #print picklefile cPickle.dump(gmm, open(dest + picklefile, 'w')) #dest = dest.split('/') dest = '/'.join((dest.split('/'))[-3:-1]) with open(r"C:\VOrder\sys\speaker_model\training_models.txt", "a") as myfile: myfile.write(dest + '/' + picklefile + '\n') for file in wav_files: os.remove(Trainer.source + file) print 'modeling completed for speaker:', file_name speech.say('modeling completed for speaker:' + file_name) features = np.asarray(()) count = 0 count = count + 1
def predict(self, test_path): sr, audio = read(test_path) vector = extract_features(audio, sr) gmms = glob.glob(os.path.join(self.audio_path, '*.gmm')) #print(gmms) scores = [] for i in gmms: #print(i) user = pickle.load(open(i, 'rb')) scores.append( (i.split('\\')[-1].split('.')[0], user.score(vector))) #print(scores) print(scores) return sorted(scores, key=lambda x: x[1], reverse=True)[0][0]
def load_test_audios(test_audio_dir: str): """ Extracts the features from the test files Args: test_audio_dir: Path to the directory in which the test audios are stored Returns vectors (list): List of features vectors, one for each audio """ audio_tests = os.listdir(os.path.join(test_audio_dir)) vectors = [] for file in audio_tests: sr, audio = read(os.path.join(test_audio_dir, file)) vector = extract_features(audio, sr) vectors.append((file.split('.')[0], vector)) return vectors
def validate(user_name): source = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Test\\" os.makedirs( "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Test\\" + user_name + "\\wav\\") f = open( "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Test\\" + user_name + "\\test.txt", 'w') test_file = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Test\\" + user_name + "\\test.txt" dir_test = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Test\\" + user_name + "\\wav\\" gmm_dir = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" + user_name + "\\" prefix_test = user_name + "\\wav\\" f_name1 = user_name + "test" record_audio(f_name1, dir_test) edit_txt(f_name1, test_file, prefix_test) file_paths1 = open(test_file, 'r') gmm_files = gmm_dir + user_name + ".gmm" # Load the Gaussian gender Models models = cpk.load(open(gmm_files, 'rb')) # Read the test directory and get the list of test audio files for path1 in file_paths1: print(path1[:-1]) path1 = path1[:-1] (sr, audio) = read(source + path1) vector = extract_features(audio, sr) log_likelihood = np.zeros(1) gmm = models # checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[0] = scores.sum() print(log_likelihood[0]) if ((logavg / log_likelihood[0]) * 100) > 50: print("legit user") else: print("Invalid user")
def count(): global speaker_name_len global speaker_num speaker_name_total = [] file_paths = [] num = filter_segment() if num is 0: peoplenum = 0 elif num is 1: peoplenum = 1 else: for i in range(num - 1): #舍弃最后一段 chunk_name = "chunk_wavs/chunk{0}.wav".format(i) file_paths.append(chunk_name) modelpath = "speaker_models/" #說話者模型的路徑 gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.pickle') ] #讀取訓練好的gmm文件 models = [cPickle.load(open(fname, 'r')) for fname in gmm_files] #讀取每個gmm文件的內容 speakers = [ fname.split("/")[-1].split(".pickle")[0] for fname in gmm_files ] #讀取說話者標籤 #人数及说话时长计算 speaker_num = [] for path in file_paths: #print file_paths path = path.strip() sr, audio = read(path) vector = extract_features(audio, sr) #计算人数及说话时长 log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() #print speakers,log_likelihood winner = np.argmax(log_likelihood) print "detected as - ", speakers[winner] speaker_name_total.append(speakers[winner]) #delete repeat speaker speaker_name = list(set(speaker_name_total)) for item in speaker_name: speaker_len = speaker_name_total.count(item) * 3 #每個說話者的長度 speaker_num.append(speaker_len)
def train_models(): #path to training data source = "C:\\Users\\USER\\GUI_SpeakerID\\development_set\\" #path where training speakers will be saved dest = "C:\\Users\\USER\\GUI_SpeakerID\\speaker_models\\" train_file = "voice_list_enroll.txt" file_paths = open(train_file, 'rb') count = 1 # Extracting features for each speaker (5 files per speakers) features = np.asarray(()) for path in file_paths: path = path.strip() print(path) # read the audio rate, audio = read(source + path.decode('utf-8')) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, rate) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training if count == 5: gmm = GaussianMixture(n_components=16, max_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = path.decode().split("_")[0] + ".gmm" pickle.dump(gmm, open(dest + picklefile, 'wb+')) print('+ modeling completed for speaker:', picklefile, " with data point = ", features.shape) features = np.asarray(()) count = 0 else: count = count + 1
def send_credential(self): print("in send credential..", self.ids["b2"].disabled) #self.ids.entry.text="Speak your Id no" self.ids["b1"].disabled = True getaudio.fun("audio.wav") #feature_extraction.fun("audio.wav","data") #idno1=getid.fun(feature_extraction.fun("audio.wav")) #got_idno=idno1 #print(got_idno) modelpath = "speaker_models/" gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] models = [pickle.load(open(fname, 'rb')) for fname in gmm_files] speakers = [ fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files ] audio, sr = librosa.load("audio.wav", res_type='kaiser_fast') vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() print("score : ", scores.sum()) winner = np.argmax(log_likelihood) print(winner) print("\tdetected as - ", speakers[winner]) if speakers[winner] == "speaker_models/sid": u = "http://localhost:5000/attendence?year=" + year + "&branch=" + branch + "&sem=" + sem + "&id_no=" + id_no r = requests.post(url=u) print(r.text) self.ids.entry.text = r.text self.ids.entry.text = "sid" else: self.ids.entry.text = "you are not id_no : " + id_no self.ids.entry.text = "Not sid" self.ids["b1"].disabled = False self.ids["b2"].disabled = False
def func(file): #path to training data source = cwd + "/development_set/" modelpath = cwd + "/speaker_models/" test_file = "4.txt" with open(test_file, "w") as f: f.write(file) file_paths = open(test_file, 'r') gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] #Load the Gaussian gender Models models = [pickle.load(open(fname, 'rb')) for fname in gmm_files] speakers = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files for path in file_paths: path = path.strip() print(path) sr, audio = read(source + path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) #l_score=[] for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) max_score = np.amax(scores) log_likelihood[i] = scores.sum() #l_score[i].append(scores) #candidate_probability = (np.exp(candidate_score)/np.exp(max_score)) * 100 winner = np.argmax(log_likelihood) print("\tdetected as - ", speakers[winner]) #print("Score of ->> ",candidate_probability) time.sleep(1.0) return speakers[winner]
def predict_voice(path): modelpath = "new_speaker_model/" gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')] models = [cPickle.load(open(fname,'rb')) for fname in gmm_files] speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] print(path) sr,audio = read(path) vector = extract_features(audio,sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print("\tdetected as - ", speakers[winner]) return speakers[winner]
def test_speaker(): #path to training data source = "C:\\Users\\USER\\GUI_SpeakerID\\development_set\\" modelpath = "C:\\Users\\USER\\GUI_SpeakerID\\speaker_models\\" test_file = "voice_list_test.txt" file_paths = open(test_file,'rb') gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')] #Load the Gaussian gender Models models = [pickle.load(open(fname,'rb')) for fname in gmm_files] speakers = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files for path in file_paths: path = path.strip() print (path) rate,audio = read(source + path.decode('utf-8')) vector = extract_features(audio,rate) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print ("\tdetected as -", speakers[winner]) time.sleep(1.0) globals.speaker = speakers[winner]
def speakerName(): #path to training data source = "development_set\\" modelpath = "speaker_models\\" test_file = "development_set_test.txt" file_paths = open(test_file, 'r') gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] #Load the Gaussian gender Models models = [cPickle.load(open(fname, 'rb')) for fname in gmm_files] speakers = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files sr, audio = read("file.wav") vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) #print (log_likelihood[winner],'\n') #print(winner) print("\tDetected as - ", speakers[winner]) return speakers[winner]
def validate(user_name): global log_likelihood log_likelihood = np.zeros(1) source_enroll = "..\\Enroll\\" source_test = "..\\Test\\" os.makedirs("..\\Test\\" + user_name + "\\wav\\") f = open("..\\Test\\" + user_name + "\\test.txt", 'w') test_file = "..\\Test\\" + user_name + "\\test.txt" dir_test = "..\\Test\\" + user_name + "\\wav\\" gmm_dir = "..\\Enroll\\" + user_name + "\\" enroll_file = "..\\Enroll\\" + user_name + "\\enroll.txt" prefix_test = user_name + "\\wav\\" # ead = input("Enter email address so that an otp can be sent:") # pwd = input("enter your mails password") messg = rand_phrase_test() # sendotph(ead, pwd, messg) rt = input( "Are you ready to validate? hit 'y' if you have the key-phrase sent via email" ) if rt == 'y': print("say your key-phrase '" + messg + "'") f_name1 = user_name + "test" record_audio(f_name1, dir_test) edit_txt(f_name1, test_file, prefix_test) gmm_files = gmm_dir + user_name + ".gmm" models = cpk.load(open(gmm_files, 'rb')) file_paths_orig = open(enroll_file, 'r') logsum = 0 log_likelihood_orig = np.zeros(1) for path_orig in file_paths_orig: path_orig = path_orig.strip() print(path_orig) (sro, audioo) = read(source_enroll + path_orig) vector_o = extract_features(audioo, sro) gmm = models # checking with each model one by one scores = np.array(gmm.score(vector_o)) log_likelihood_orig[0] = scores.sum() print(log_likelihood_orig[0]) logsum = logsum + log_likelihood_orig[0] logavg = logsum / 5 print(logavg) # uncomment later file_paths1 = open(test_file, 'r') # Read the test directory and get the list of test audio files for path1 in file_paths1: print(path1[:-1]) path1 = path1[:-1] (sr, audio) = read(source_test + path1) vector = extract_features(audio, sr) gmm = models # checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[0] = scores.sum() print(log_likelihood[0]) #uncomment for reference if ((logavg / log_likelihood[0]) * 100) > 85: print("legit user") else: print("Invalid user")
def enroll(user_name): source_enroll = "..\\Enroll\\" os.makedirs("..\\Enroll\\" + user_name + "\\wav\\") train_dir = "..\\Enroll\\" + user_name + "\\wav\\" gmm_dir = "..\\Enroll\\" + user_name + "\\" prefix_train = user_name + "\\wav\\" f = open("..\\Enroll\\" + user_name + "\\enroll.txt", 'w') enroll_file = "..\\Enroll\\" + user_name + "\\enroll.txt" print("Welcome" + user_name) for i in range(5): trial = "trial" + str(i + 1) print(trial) key_phrase = rand_phrase_train() print("Please say '" + key_phrase + "' once the system starts listening") time.sleep(2) f_name = user_name + str(i + 1) record_audio(f_name, train_dir) edit_txt(f_name, enroll_file, prefix_train) file_paths = open(enroll_file, 'r') count = 1 features = np.asarray(()) for path in file_paths: path = path.strip() print(path) # read the audio sr, audio = read(source_enroll + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training if count == 5: gmm = GMM(n_components=8, n_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = user_name + ".gmm" cpk.dump(gmm, open(gmm_dir + picklefile, 'wb')) print('+ modeling completed for speaker:', picklefile, " with data point = ", features.shape) features = np.asarray(()) count = 0 count = count + 1
def enroll(user_name): source = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" os.makedirs( "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" + user_name + "\\wav\\") train_dir = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" + user_name + "\\wav\\" gmm_dir = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" + user_name + "\\" prefix_train = user_name + "\\wav\\" f = open( "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" + user_name + "\\enroll.txt", 'w') enroll_file = "C:\\Users\\Tilak1114\\Desktop\\DEVHACK2018\\22apr2018\\Enroll\\" + user_name + "\\enroll.txt" global logavg print("Welcome" + user_name) for i in range(3): trial = "trial" + str(i + 1) print(trial) key_phrase = rand_phrase() print("Please say '" + key_phrase + "' once the system starts listening") time.sleep(2) f_name = user_name + str(i + 1) record_audio(f_name, train_dir) edit_txt(f_name, enroll_file, prefix_train) file_paths = open(enroll_file, 'r') file_paths1 = open(enroll_file, 'r') count = 1 features = np.asarray(()) for path in file_paths: path = path.strip() print(path) # read the audio sr, audio = read(source + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 3 files of speaker are concatenated, then do model training if count == 3: gmm = GMM(n_components=16, n_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = user_name + ".gmm" cpk.dump(gmm, open(gmm_dir + picklefile, 'wb')) print('+ modeling completed for speaker:', picklefile, " with data point = ", features.shape) features = np.asarray(()) count = 0 count = count + 1 gmm_files = gmm_dir + user_name + ".gmm" models = cpk.load(open(gmm_files, 'rb')) logsum = 0 for path1 in file_paths1: path1 = path1.strip() print(path1) (sro, audioo) = read(source + path1) vector_o = extract_features(audioo, sro) log_likelihood_orig = np.zeros(1) gmm = models # checking with each model one by one scores = np.array(gmm.score(vector_o)) log_likelihood_orig[0] = scores.sum() print(log_likelihood_orig[0]) logsum = logsum + log_likelihood_orig[0] logavg = logsum / 3 print(logavg) del_txt(enroll_file) return logavg
inputFile(train_file, source, train_num) file_paths = open(train_file, 'r') count = 1 # Extracting features for each speaker (13 files per speakers) features = np.asarray(()) for path in file_paths: path = path.strip() print(path) # read the audio sr, audio = read(source + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 1 files of speaker are concatenated, then do model training if count == 13: gmm = GaussianMixture( n_components=16, covariance_type='diag', n_init=3 ) #n_components = the number of mixture components, n_init = initialization gmm.fit(features) #추출된 특징들 토대로 EM 알고리즘을 통해 모델 파라미터 측정 # dumping the trained gaussian model picklefile = path.split(".wav")[0] + ".gmm"
def run(): source = "test/" modelpath = "models/" test_file = "test.txt" file_paths = open(test_file, 'r') gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] models = [] for i in range(len(gmm_files)): file = open(gmm_files[i], 'rb') models.insert(i, cPickle.load(file)) speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files print("Start. ") last = [] while True: test = CreateRecord.Record() test.createRecord("test.wav") path = "test.wav" sr, audio = read(path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) if speakers[winner] == "motrinhduyet": webbrowser.get( "C:/Program Files (x86)/Internet Explorer/iexplore.exe %s" ).open("http://Google.com.vn") break elif speakers[winner] == "excel": os.startfile( 'C:/Program Files (x86)/Microsoft Office/root/Office16/EXCEL.EXE' ) break elif speakers[winner] == "khoamanhinh": ctypes.windll.user32.LockWorkStation() break # elif speakers[winner] == "tatnguon": # os.system("shutdown /s /t 3600") # break elif speakers[winner] == "24h": webbrowser.get( "C:/Program Files (x86)/Internet Explorer/iexplore.exe %s" ).open("http://24h.com.vn") break elif speakers[winner] == "dung": break