def train_and_dump(dirs, start, end, output_model, features_save): m = ModelInterface() #print("len(dirs[start:end]):", len(dirs[start:end])) for d in dirs[start:end]: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The group wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print ("No valid directory found!") sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print ("No wav file found in %s"%(d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) print("wav %s has been enrolled"%(wav)) except Exception as e: print(wav + " error %s"%(e)) m.train() m.dump(output_model)
def train(train_data_dir, model_path): m = ModelInterface() files = [f for f in os.listdir(train_data_dir) if re.search(r"\.wav", f)] for f in files: label, _ = f.split("_") file = os.path.join(train_data_dir, f) try: fs, signal = read_wav(file) m.enroll(label, fs, signal) logger.info("wav %s has been enrolled" % (file)) except Exception as e: logger.info(file + " error %s" % (e)) m.train() m.dump(model_path)
def train( ): m = ModelInterface() train_dir = 'data/train/' train_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] file_name = ['a.wav', 'b.wav', 'c.wav', 'd.wav', 'e.wav', 'f.wav', 'g.wav'] for c in train_class: for n in file_name: fs, signal = wavfile.read(train_dir + c + n) m.enroll(c, fs, signal) m.train() m.dump('model/model.out')
def task_train_single(wav_url, person_id): if os.path.exists(model): m = ModelInterface.load(model) else: m = ModelInterface() if person_id in m.features: return 'fail', 'aleady exist' #下载训练语音文件 dest_dir = train_voice_dir + person_id if not os.path.exists(dest_dir): os.makedirs(dest_dir) current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time())) dest_wav = dest_dir + '/' + current_time + '_' + person_id + '.wav' print(wav_url) print(dest_wav) utils.download_file(wav_url, dest_wav) #获取下载好的训练语音文件 wavs = glob.glob(dest_dir + '/*.wav') if len(wavs) == 0: return 'fail', 'no wav files under this dir' #train the wavs for wav in wavs: try: fs, signal = utils.read_wav(wav) m.enroll(person_id, fs, signal) print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) m.train_single(person_id) m.dump(model) return 'success', ''
def task_enroll(): train_sounds_path = os.path.join(os.getcwd(), "trainSounds") # print('inside enroll fun') dirs = os.listdir(train_sounds_path) wavs = [] if len(dirs) == 0: print('Directory empty') else: for d in dirs: ext = os.path.splitext(d)[-1].lower() if ext == '.wav': wavs.append(d) m = ModelInterface() for w in wavs: sample_rate, signal = read_wav(os.path.join(train_sounds_path, w)) label = os.path.splitext(w)[0] m.enroll(label, sample_rate, signal) print(label + ' enrolled') m.train() m.dump('data.bin')
def task_train_full(input_dirs, output_model): m = ModelInterface() #get all the subdir train_dir = [] for subdirs in os.walk(input_dirs): train_dir.append(subdirs[0]) train_dir.remove(train_dir[0]) #去掉本身根目录 #input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] #train_dir = itertools.chain(*(glob.glob(d) for d in input_dirs)) #train_dir = [d for d in train_dir if os.path.isdir(d)] files = [] if len(train_dir) == 0: print("No valid directory found!") return 'fail', 'No valid directory found!' for d in train_dir: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = utils.read_wav(wav) m.enroll(label, fs, signal) print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) m.train_full() m.dump(output_model) return 'success', ''
def task_enroll(input_dirs, output_model, features_save=None, group_person_num=None): m = ModelInterface() # 把输入的多个目录字符串分离为目录列表 input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] # 把各个目录下的子目录列表解压出来组合成一个迭代器 dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) # 生成包括所有子目录的列表 dirs = [d for d in dirs if os.path.isdir(d)] for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
class Main(QMainWindow): CONV_INTERVAL = 0.4 CONV_DURATION = 1.5 CONV_FILTER_DURATION = CONV_DURATION FS = 8000 TEST_DURATION = 3 def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("edytor.ui", self) self.statusBar() self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata =[] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('avatar/') # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = read_wav("bg.wav") self.backend.init_noise(fs, signal) except: pass ############ RECORD def start_record(self): self.pyaudio = pyaudio.PyAudio() self.status("Recording...") self.movie.start() self.Alading.setPixmap(QPixmap(u"image/a_thinking.png")) self.recordData = [] self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=Main.FS, input=True, frames_per_buffer=1) self.stopped = False self.reco_th = RecorderThread(self) self.reco_th.start() self.timer.start(1000) self.record_time = 0 self.update_all_timer() def add_record_data(self, i): self.recordData.append(i) return self.stopped def timer_callback(self): self.record_time += 1 self.status("Recording..." + time_str(self.record_time)) self.update_all_timer() def stop_record(self): self.movie.stop() self.stopped = True self.reco_th.wait() self.timer.stop() self.stream.stop_stream() self.stream.close() self.pyaudio.terminate() self.status("Record stopeed") ############## conversation def start_conv_record(self): self.conv_result_list = [] self.start_record() self.conv_now_pos = 0 self.conv_timer = QTimer(self) self.conv_timer.timeout.connect(self.do_conversation) self.conv_timer.start(Main.CONV_INTERVAL * 1000) #reset self.graphwindow.wid.reset() def stop_conv(self): self.stop_record() self.conv_timer.stop() def do_conversation(self): interval_len = int(Main.CONV_INTERVAL * Main.FS) segment_len = int(Main.CONV_DURATION * Main.FS) self.conv_now_pos += interval_len to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]): self.conv_now_pos] signal = np.array(to_filter, dtype=NPDtype) label = None try: signal = self.backend.filter(Main.FS, signal) if len(signal) > 50: label = self.backend.predict(Main.FS, signal, True) except Exception as e: print traceback.format_exc() print str(e) global last_label_to_show label_to_show = label if label and self.conv_result_list: last_label = self.conv_result_list[-1] if last_label and last_label != label: label_to_show = last_label_to_show self.conv_result_list.append(label) print label_to_show, "label to show" last_label_to_show = label_to_show #ADD FOR GRAPH if label_to_show is None: label_to_show = 'Nobody' if len(NAMELIST) and NAMELIST[-1] != label_to_show: NAMELIST.append(label_to_show) self.convUsername.setText(label_to_show) self.Alading_conv.setPixmap(QPixmap(u"image/a_result.png")) self.convUserImage.setPixmap(self.get_avatar(label_to_show)) ###### RECOGNIZE def start_reco_record(self): self.Alading.setPixmap(QPixmap(u"image/a_hello")) self.recoRecordData = np.array((), dtype=NPDtype) self.start_record() def stop_reco_record(self): self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) self.reco_remove_update(Main.FS, signal) def reco_do_predict(self, fs, signal): label = self.backend.predict(fs, signal) if not label: label = "Nobody" print label self.recoUsername.setText(label) self.Alading.setPixmap(QPixmap(u"image/a_result.png")) self.recoUserImage.setPixmap(self.get_avatar(label)) # TODO To Delete write_wav('reco.wav', fs, signal) def reco_remove_update(self, fs, signal): new_signal = self.backend.filter(fs, signal) print "After removed: {0} -> {1}".format(len(signal), len(new_signal)) self.recoRecordData = np.concatenate((self.recoRecordData, new_signal)) real_len = float(len(self.recoRecordData)) / Main.FS / Main.TEST_DURATION * 100 if real_len > 100: real_len = 100 self.reco_do_predict(fs, self.recoRecordData) def reco_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") print 'reco_file' if not fname: return self.status(fname) fs, signal = read_wav(fname) self.reco_do_predict(fs, signal) def reco_files(self): fnames = QFileDialog.getOpenFileNames(self, "Select Wav Files", "", "Files (*.wav)") print 'reco_files' for f in fnames: fs, sig = read_wav(f) newsig = self.backend.filter(fs, sig) label = self.backend.predict(fs, newsig) print f, label ########## ENROLL def start_enroll_record(self): self.enrollWav = None self.enrollFileName.setText("") self.start_record() def enroll_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") if not fname: return self.status(fname) self.enrollFileName.setText(fname) fs, signal = read_wav(fname) signal = monophonic(signal) self.enrollWav = (fs, signal) def stop_enroll_record(self): self.stop_record() print self.recordData[:300] signal = np.array(self.recordData, dtype=NPDtype) self.enrollWav = (Main.FS, signal) # TODO To Delete write_wav('enroll.wav', *self.enrollWav) def do_enroll(self): name = self.Username.text().trimmed() if not name: self.warn("Please Input Your Name") return # self.addUserInfo() new_signal = self.backend.filter(*self.enrollWav) print "After removed: {0} -> {1}".format(len(self.enrollWav[1]), len(new_signal)) print "Enroll: {:.4f} seconds".format(float(len(new_signal)) / Main.FS) if len(new_signal) == 0: print "Error! Input is silent! Please enroll again" return self.backend.enroll(name, Main.FS, new_signal) def start_train(self): self.status("Training...") self.backend.train() self.status("Training Done.") ####### UI related def getWidget(self, splash): t = QtCore.QElapsedTimer() t.start() while (t.elapsed() < 800): str = QtCore.QString("times = ") + QtCore.QString.number(t.elapsed()) splash.showMessage(str) QtCore.QCoreApplication.processEvents() def upload_avatar(self): fname = QFileDialog.getOpenFileName(self, "Open JPG File", "", "File (*.jpg)") if not fname: return self.avatarname = fname self.Userimage.setPixmap(QPixmap(fname)) def loadUsers(self): with open("avatar/metainfo.txt") as db: for line in db: tmp = line.split() self.userdata.append(tmp) self.Userchooser.addItem(tmp[0]) def showUserInfo(self): for user in self.userdata: if self.userdata.index(user) == self.Userchooser.currentIndex() - 1: self.Username.setText(user[0]) self.Userage.setValue(int(user[1])) if user[2] == 'F': self.Usersex.setCurrentIndex(1) else: self.Usersex.setCurrentIndex(0) self.Userimage.setPixmap(self.get_avatar(user[0])) def updateUserInfo(self): userindex = self.Userchooser.currentIndex() - 1 u = self.serdata[userindex] u[0] = unicode(self.Username.displayText()) u[1] = self.Userage.value() if self.Usersex.currentIndex(): u[2] = 'F' else: u[2] = 'M' with open("avatar/metainfo.txt","w") as db: for user in self.userdata: for i in range(3): db.write(str(user[i]) + " ") db.write("\n") def writeuserdata(self): with open("avatar/metainfo.txt","w") as db: for user in self.userdata: for i in range (0,4): db.write(str(user[i]) + " ") db.write("\n") def clearUserInfo(self): self.Username.setText("") self.Userage.setValue(0) self.Usersex.setCurrentIndex(0) self.Userimage.setPixmap(self.defaultimage) def addUserInfo(self): for user in self.userdata: if user[0] == unicode(self.Username.displayText()): return newuser = [] newuser.append(unicode(self.Username.displayText())) newuser.append(self.Userage.value()) if self.Usersex.currentIndex(): newuser.append('F') else: newuser.append('M') if self.avatarname: shutil.copy(self.avatarname, 'avatar/' + user[0] + '.jpg') self.userdata.append(newuser) self.writeuserdata() self.Userchooser.addItem(unicode(self.Username.displayText())) ############# UTILS def warn(self, s): QMessageBox.warning(self, "Warning", s) def status(self, s=""): self.statusBar().showMessage(s) def update_all_timer(self): s = time_str(self.record_time) self.enrollTime.setText(s) self.recoTime.setText(s) self.convTime.setText(s) def dump(self): fname = QFileDialog.getSaveFileName(self, "Save Data to:", "", "") if fname: try: self.backend.dump(fname) except Exception as e: self.warn(str(e)) else: self.status("Dumped to file: " + fname) def load(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: try: self.backend = ModelInterface.load(fname) except Exception as e: self.warn(str(e)) else: self.status("Loaded from file: " + fname) def noise_clicked(self): self.recording_noise = not self.recording_noise if self.recording_noise: self.noiseButton.setText('Stop Recording Noise') self.start_record() else: self.noiseButton.setText('Recording Background Noise') self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) wavfile.write("bg.wav", Main.FS, signal) self.backend.init_noise(Main.FS, signal) def load_noise(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "Wav File (*.wav)") if fname: fs, signal = read_wav(fname) self.backend.init_noise(fs, signal) def load_avatar(self, dirname): self.avatars = {} for f in glob.glob(dirname + '/*.jpg'): name = os.path.basename(f).split('.')[0] print f, name self.avatars[name] = QPixmap(f) def get_avatar(self, username): p = self.avatars.get(str(username), None) if p: return p else: return self.defaultimage def printDebug(self): for name, feat in self.backend.features.iteritems(): print name, len(feat) print "GMMs", print len(self.backend.gmmset.gmms) '''
class Main(QMainWindow): CONV_INTERVAL = 0.4 CONV_DURATION = 1.5 CONV_FILTER_DURATION = CONV_DURATION FS = 8000 TEST_DURATION = 3 def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("edytor.ui", self) self.statusBar() self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata = [] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('avatar/') # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = wavfile.read("bg.wav") self.backend.init_noise(fs, signal) except: pass ############ RECORD def start_record(self): self.pyaudio = pyaudio.PyAudio() self.status("Recording...") self.movie.start() self.Alading.setPixmap(QPixmap(u"image/a_thinking.png")) self.recordData = [] self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=Main.FS, input=True, frames_per_buffer=1) self.stopped = False self.reco_th = RecorderThread(self) self.reco_th.start() self.timer.start(1000) self.record_time = 0 self.update_all_timer() def add_record_data(self, i): self.recordData.append(i) return self.stopped def timer_callback(self): self.record_time += 1 self.status("Recording..." + time_str(self.record_time)) self.update_all_timer() def stop_record(self): self.movie.stop() self.stopped = True self.reco_th.wait() self.timer.stop() self.stream.stop_stream() self.stream.close() self.pyaudio.terminate() self.status("Record stopeed") ############## conversation def start_conv_record(self): self.conv_result_list = [] self.start_record() self.conv_now_pos = 0 self.conv_timer = QTimer(self) self.conv_timer.timeout.connect(self.do_conversation) self.conv_timer.start(Main.CONV_INTERVAL * 1000) #reset self.graphwindow.wid.reset() def stop_conv(self): self.stop_record() self.conv_timer.stop() def do_conversation(self): interval_len = int(Main.CONV_INTERVAL * Main.FS) segment_len = int(Main.CONV_DURATION * Main.FS) self.conv_now_pos += interval_len to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]):self.conv_now_pos] signal = np.array(to_filter, dtype=NPDtype) label = None try: signal = self.backend.filter(Main.FS, signal) if len(signal) > 50: label = self.backend.predict(Main.FS, signal, True) except Exception as e: print traceback.format_exc() print str(e) global last_label_to_show label_to_show = label if label and self.conv_result_list: last_label = self.conv_result_list[-1] if last_label and last_label != label: label_to_show = last_label_to_show self.conv_result_list.append(label) print label_to_show, "label to show" last_label_to_show = label_to_show #ADD FOR GRAPH if label_to_show is None: label_to_show = 'Nobody' if len(NAMELIST) and NAMELIST[-1] != label_to_show: NAMELIST.append(label_to_show) self.convUsername.setText(label_to_show) self.Alading_conv.setPixmap(QPixmap(u"image/a_result.png")) self.convUserImage.setPixmap(self.get_avatar(label_to_show)) ###### RECOGNIZE def start_reco_record(self): self.Alading.setPixmap(QPixmap(u"image/a_hello")) self.recoRecordData = np.array((), dtype=NPDtype) self.start_record() def stop_reco_record(self): self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) self.reco_remove_update(Main.FS, signal) def reco_do_predict(self, fs, signal): label = self.backend.predict(fs, signal) if not label: label = "Nobody" print label self.recoUsername.setText(label) self.Alading.setPixmap(QPixmap(u"image/a_result.png")) self.recoUserImage.setPixmap(self.get_avatar(label)) # TODO To Delete write_wav('reco.wav', fs, signal) def reco_remove_update(self, fs, signal): new_signal = self.backend.filter(fs, signal) print "After removed: {0} -> {1}".format(len(signal), len(new_signal)) self.recoRecordData = np.concatenate((self.recoRecordData, new_signal)) real_len = float(len( self.recoRecordData)) / Main.FS / Main.TEST_DURATION * 100 if real_len > 100: real_len = 100 self.reco_do_predict(fs, self.recoRecordData) def reco_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") print 'reco_file' if not fname: return self.status(fname) fs, signal = wavfile.read(fname) self.reco_do_predict(fs, signal) def reco_files(self): fnames = QFileDialog.getOpenFileNames(self, "Select Wav Files", "", "Files (*.wav)") print 'reco_files' for f in fnames: fs, sig = wavfile.read(f) newsig = self.backend.filter(fs, sig) label = self.backend.predict(fs, newsig) print f, label ########## ENROLL def start_enroll_record(self): self.enrollWav = None self.enrollFileName.setText("") self.start_record() def enroll_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") if not fname: return self.status(fname) self.enrollFileName.setText(fname) fs, signal = wavfile.read(fname) signal = monophonic(signal) self.enrollWav = (fs, signal) def stop_enroll_record(self): self.stop_record() print self.recordData[:300] signal = np.array(self.recordData, dtype=NPDtype) self.enrollWav = (Main.FS, signal) # TODO To Delete write_wav('enroll.wav', *self.enrollWav) def do_enroll(self): name = self.Username.text().trimmed() if not name: self.warn("Please Input Your Name") return # self.addUserInfo() new_signal = self.backend.filter(*self.enrollWav) print "After removed: {0} -> {1}".format(len(self.enrollWav[1]), len(new_signal)) print "Enroll: {:.4f} seconds".format(float(len(new_signal)) / Main.FS) self.backend.enroll(name, Main.FS, new_signal) def start_train(self): self.status("Training...") self.backend.train() self.status("Training Done.") ####### UI related def getWidget(self, splash): t = QtCore.QElapsedTimer() t.start() while (t.elapsed() < 800): str = QtCore.QString("times = ") + QtCore.QString.number( t.elapsed()) splash.showMessage(str) QtCore.QCoreApplication.processEvents() def upload_avatar(self): fname = QFileDialog.getOpenFileName(self, "Open JPG File", "", "File (*.jpg)") if not fname: return self.avatarname = fname self.Userimage.setPixmap(QPixmap(fname)) def loadUsers(self): with open("avatar/metainfo.txt") as db: for line in db: tmp = line.split() self.userdata.append(tmp) self.Userchooser.addItem(tmp[0]) def showUserInfo(self): for user in self.userdata: if self.userdata.index( user) == self.Userchooser.currentIndex() - 1: self.Username.setText(user[0]) self.Userage.setValue(int(user[1])) if user[2] == 'F': self.Usersex.setCurrentIndex(1) else: self.Usersex.setCurrentIndex(0) self.Userimage.setPixmap(self.get_avatar(user[0])) def updateUserInfo(self): userindex = self.Userchooser.currentIndex() - 1 u = self.serdata[userindex] u[0] = unicode(self.Username.displayText()) u[1] = self.Userage.value() if self.Usersex.currentIndex(): u[2] = 'F' else: u[2] = 'M' with open("avatar/metainfo.txt", "w") as db: for user in self.userdata: for i in range(3): db.write(str(user[i]) + " ") db.write("\n") def writeuserdata(self): with open("avatar/metainfo.txt", "w") as db: for user in self.userdata: for i in range(0, 4): db.write(str(user[i]) + " ") db.write("\n") def clearUserInfo(self): self.Username.setText("") self.Userage.setValue(0) self.Usersex.setCurrentIndex(0) self.Userimage.setPixmap(self.defaultimage) def addUserInfo(self): for user in self.userdata: if user[0] == unicode(self.Username.displayText()): return newuser = [] newuser.append(unicode(self.Username.displayText())) newuser.append(self.Userage.value()) if self.Usersex.currentIndex(): newuser.append('F') else: newuser.append('M') if self.avatarname: shutil.copy(self.avatarname, 'avatar/' + user[0] + '.jpg') self.userdata.append(newuser) self.writeuserdata() self.Userchooser.addItem(unicode(self.Username.displayText())) ############# UTILS def warn(self, s): QMessageBox.warning(self, "Warning", s) def status(self, s=""): self.statusBar().showMessage(s) def update_all_timer(self): s = time_str(self.record_time) self.enrollTime.setText(s) self.recoTime.setText(s) self.convTime.setText(s) def dump(self): fname = QFileDialog.getSaveFileName(self, "Save Data to:", "", "") if fname: try: self.backend.dump(fname) except Exception as e: self.warn(str(e)) else: self.status("Dumped to file: " + fname) def load(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: try: self.backend = ModelInterface.load(fname) except Exception as e: self.warn(str(e)) else: self.status("Loaded from file: " + fname) def noise_clicked(self): self.recording_noise = not self.recording_noise if self.recording_noise: self.noiseButton.setText('Stop Recording Noise') self.start_record() else: self.noiseButton.setText('Recording Background Noise') self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) wavfile.write("bg.wav", Main.FS, signal) self.backend.init_noise(Main.FS, signal) def load_noise(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "Wav File (*.wav)") if fname: fs, signal = wavfile.read(fname) self.backend.init_noise(fs, signal) def load_avatar(self, dirname): self.avatars = {} for f in glob.glob(dirname + '/*.jpg'): name = os.path.basename(f).split('.')[0] print f, name self.avatars[name] = QPixmap(f) def get_avatar(self, username): p = self.avatars.get(str(username), None) if p: return p else: return self.defaultimage def printDebug(self): for name, feat in self.backend.features.iteritems(): print name, len(feat) print "GMMs", print len(self.backend.gmmset.gmms) '''
name = name.strip() print( f"Hello {name}. Please input your voice {args.num_samples} times") with tempfile.TemporaryDirectory() as tempdir: i = 1 while i <= args.num_samples: with sr.Microphone() as source: audio = r.listen(source) # Generate random filename filename = os.path.join( tempdir, name + "_" + str(uuid.uuid1()) + ".wav") with open(filename, "wb") as file: file.write(audio.get_wav_data(convert_rate=16000)) # enroll a file fs, signal = read_wav(filename) model.enroll(name, fs, signal) logger.info("wav file %s has been enrolled" % (filename)) i += 1 model.train() model.dump(args.model_path) else: model = ModelInterface.load(args.model_path) print("Please input your voice: ") with tempfile.TemporaryDirectory() as tempdir: with sr.Microphone() as source: audio = r.listen(source) filename = os.path.join(tempdir, str(uuid.uuid1()) + ".wav") with open(filename, "wb") as file: file.write(audio.get_wav_data(convert_rate=16000)) fs, signal = read_wav(filename)
def task_enroll( input_dirs, output_model, isDynamic ): # Possible improvement : Store VAD signal of each .wav file instead of calculate it twice for dynamic threshold (useless for static) """ Enroll the speaker inside the GMM model. Args: input_dirs (string): directory of the database output_model (string): path of the output model """ # Creates the model object # m = ModelInterface() # Extracts the absolute path from 'input_dirs' # # If input_dirs is an array of directory, it is adapted by the second line # print(input_dirs) input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] if len(dirs) == 0: print('No valid directory found!') sys.exit(1) # Starts the enrollment of the valid directories # start_time = time.time() print('Starting enrollment') for d in dirs: print(d) # Retrieves the label of the current directory name and loads .wav files are stored # label = os.path.basename(d.rstrip('/')) print(label) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print('No wav file found in %s' % (d)) continue for wav in wavs: # Audio processing of the .wav file # # Retrieves sampling rate (fs), signal values # # VAD removes silence inside the signal # # Enrolls the cleared signal and its label inside the model # try: fs, signal = read_wav(wav) signal = signal / max(abs(signal)) m.enroll(label, fs, VAD_process(signal)) except Exception as e: print(wav + ' error %s' % (e)) # Starts the training of the model using the enrolled signals # print('Enrollment finished\nTraining started') m.train() print('Training finished') # Starts a dynamic threshold computation # # /!\ Warning : computation takes a lot of time /!\ # if (isDynamic): print('Dynamic Threshold started') i = 0 for d in dirs: # Loads the .wav files # # Each .wav file will be used to compute a score for each existing label after training # if len(wavs) == 0: print('No wav file found in %s' % (d)) continue wavs = glob.glob(d + '/*.wav') for wav in wavs: try: fs, signal = read_wav(wav) signal = signal / max(abs(signal)) m.dynamic_threshold(fs, VAD_process(signal)) except Exception as e: print(wav + ' error %s' % (e)) i += 1 for j in range(0, 100, 10): if (i == int(len(dirs) * j * 0.01)): print('%i percent done.' % (j)) # Keeps only the mean from all scores for a given label as dynamic threshold # try: m.dynamic_mean() except Exception as e: print('Error for dyanmic threshold : error %e' % (e)) print('Dynamic threshold finished') print(time.time() - start_time, " seconds") # Saves the model at the specified path 'output_model' # m.dump(output_model)