def train_and_dump(dirs, start, end, output_model, features_save): m = ModelInterface() #print("len(dirs[start:end]):", len(dirs[start:end])) for d in dirs[start:end]: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The group wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def _cache(data: TextIO, model_name: Text, output: BinaryIO, **kwargs): cpu = require_device(prefer_cuda=False) model_type = models.select(model_name) model = ModelInterface(model_type, cpu, False) csv = util.load_csv(data) cache = {} for smiles in csv.keys(): cache_key = (smiles, ) # memcached is indexed on argument list data = model.process(smiles) cache[cache_key] = model.encode_data(data, **kwargs) pickle.dump(cache, output)
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print ("No valid directory found!") sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print ("No wav file found in %s"%(d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) print("wav %s has been enrolled"%(wav)) except Exception as e: print(wav + " error %s"%(e)) m.train() m.dump(output_model)
def load_model(self, MainWindow): global m fileName = QtWidgets.QFileDialog().getOpenFileName( MainWindow, "Load Model", "", "Model File (*.out)") print(fileName[0]) self.ln_model.setText(fileName[0]) m = ModelInterface.load(fileName[0])
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) if os.path.exists(input_files): for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename( input_files) with DB(host='47.92.33.19', user='******', passwd='1qazxsw2', db='database_fm') as db: # db.execute("INSERT INTO database_fm (id,radio_file_path,sound_markup) VALUES (null,'{}','{}')".format(f,label)) db.execute( "UPDATE fm_t_scan_record SET sound_markup = '{}' WHERE radio_file_path = '{}'" .format(label, filepath)) print(filepath, '->', label, ", score->", score) os.remove(f) else: filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename( input_files) with DB(host='47.92.33.19', user='******', passwd='1qazxsw2', db='database_fm') as db: db.execute( "UPDATE fm_t_scan_record SET sound_markup = 'Exception' WHERE radio_file_path = '{}'" .format(filepath))
def task_verify(wav_url, person_id): start_time = time.time() print('开始时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))) m = ModelInterface.load(model) if person_id not in m.features: return 'fail', 'current user not trained', '' # 下载训练语音文件, current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time())) dest_wav = verify_voice_dir + current_time + '_' + person_id + '.wav' utils.download_file(wav_url, dest_wav) for f in glob.glob(os.path.expanduser(dest_wav)): fs, signal = utils.read_wav(f) probability = m.verify(fs, signal, person_id) print(probability) if probability > -48: print(f, '-> 匹配成功 :', person_id) return 'success', '', 'yes' else: print(f, '->未匹配成功') return 'success', '', 'no' end_time = time.time() print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))) print('共耗时', end_time - start_time)
def build_model(model_info: TModelInfo, no_initialize: bool = False) -> ModelInterface: model_type, device, kwargs = model_info return ModelInterface(model_type, device, no_initialize=no_initialize, **kwargs)
def OrderEnroll(): m = ModelInterface.load("model.out") fs, signal = read_wav("./GUI/TotalRecording/18082020202755.wav") m.enroll("18082020202755", fs, signal) m.train() m.CheckEnroll() m.dump("mo1.out") # def task_predictgui(path, input_model): # m = ModelInterface.load(input_model) # f=glob.glob(path) # fs, signal = read_wav(f[0]) # label, score = m.predict(fs, signal) # return label # # if __name__ == "__main__": # # global args # # args = get_args() # # # # task = args.task # # # # if task == 'enroll': # # task_enroll(args.input, args.model) # # # # # # elif task == 'predict': # # task_predict(args.input, args.model) # # task_predict("datatest/*.wav", "model1.out") # task_enroll("./Train/*","model.out") # # # task_predict("./Test", "model.out") # Predict_ByFile("./GUI/TotalRecording/18082020202755.wav", "D:/doantotnghiep/Speaker_recognition/model.out") # OrderEnroll()
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) fs, signal = read_wav(input_files) label, score = m.predict(fs, signal) print("label", '->', label, ", score->", score) result = [label, score] return result
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) print(f, '->', label, ", score->", score) return label, score
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) g = open("Test_results.txt", "a") print(f, '->', label, ", score->", score, file=g) g.close()
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) str = "label = {l2} score = {l3}".format(l2=label, l3=score) return [str, label]
def task_check_status(person_id): m = ModelInterface.load(model) if person_id not in m.features: return 'success', '', 'no' else: return 'success', '', 'yes'
def load(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: try: self.backend = ModelInterface.load(fname) except Exception as e: self.warn(str(e)) else: self.status("Loaded from file: " + fname)
def __init__(self): self.backend = ModelInterface.load(self.INPUT_MODEL) try: fs, signal = read_wav(self.BG) self.backend.init_noise(fs, signal) except: print "file not found!" self.pub = rospy.Publisher('/speaker',String,queue_size = 10) self.sub = rospy.Subscriber('/wav',numpy_msg(Floats),self.task_predict)
def evaluate_loss( model: ModelInterface, batch: Sequence[Any], label: torch.Tensor ) -> torch.Tensor: # criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 60.0])) criterion = torch.nn.CrossEntropyLoss() pred = model.forward(batch) loss = criterion(pred, label) return loss
def __init__(self): self.backend = ModelInterface.load(self.INPUT_MODEL) try: fs, signal = read_wav(self.BG) self.backend.init_noise(fs, signal) except: print "file not found!" self.pub = rospy.Publisher('/speaker', String, queue_size=10) self.sub = rospy.Subscriber('/wav', numpy_msg(Floats), self.task_predict)
def Predict_ByFile(file, input_model): print("start") m = ModelInterface.load(input_model) fs, signal = read_wav(file) print(fs) print(signal) label, score = m.predict(fs, signal) strPath = os.path.realpath(file) y_true = os.path.basename(os.path.dirname(strPath)) print(label) print(score) return label
def feature_re_extract(): #pdb.set_trace() test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] for c in test_class: for i in m.features[c]: train_set.append(i) # construct autoencoder train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=39, n_hidden=100 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) # train autoencoder training_epochs = 100 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da(numpy.asarray(train_set))) print 'Training epoch %d, cost ' % epoch, c1[len(c1)-1] for c in test_class: m.features[c] = da.get_hidden_values(m.features[c]).eval() m.train() m.dump('model/model_da.out') with open('model/da.out', 'w') as f: pickle.dump(da, f, -1)
def train(train_data_dir, model_path): m = ModelInterface() files = [f for f in os.listdir(train_data_dir) if re.search(r"\.wav", f)] for f in files: label, _ = f.split("_") file = os.path.join(train_data_dir, f) try: fs, signal = read_wav(file) m.enroll(label, fs, signal) logger.info("wav %s has been enrolled" % (file)) except Exception as e: logger.info(file + " error %s" % (e)) m.train() m.dump(model_path)
def evaluate(eval_data_dir, model_path): m = ModelInterface.load(model_path) files = [f for f in os.listdir(eval_data_dir) if re.search(r"\.wav", f)] total, n_correct = 0, 0 for f in files: total += 1 label, _ = f.split("_") file = os.path.join(eval_data_dir, f) fs, signal = read_wav(file) pred, _ = m.predict(fs, signal) logger.info("Input: {}, Output: {}".format(file, pred)) if label == pred: n_correct += 1 logger.info("Accuracy: {}".format(n_correct / total))
def task_enroll(input_dirs, output_model, features_save=None, group_person_num=None): m = ModelInterface() # 把输入的多个目录字符串分离为目录列表 input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] # 把各个目录下的子目录列表解压出来组合成一个迭代器 dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) # 生成包括所有子目录的列表 dirs = [d for d in dirs if os.path.isdir(d)] for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print("No wav file found in %s" % (d)) continue for wav in wavs: try: fs, signal = read_wav(wav) m.enroll(label, fs, signal) #print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) print("The wav files has been enrolled") # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件 if features_save: m.mfcc_dump(features_save) print( "The features of this group wav files has been pickle.dumped to %s" % features_save) m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def task_train_single(wav_url, person_id): if os.path.exists(model): m = ModelInterface.load(model) else: m = ModelInterface() if person_id in m.features: return 'fail', 'aleady exist' #下载训练语音文件 dest_dir = train_voice_dir + person_id if not os.path.exists(dest_dir): os.makedirs(dest_dir) current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time())) dest_wav = dest_dir + '/' + current_time + '_' + person_id + '.wav' print(wav_url) print(dest_wav) utils.download_file(wav_url, dest_wav) #获取下载好的训练语音文件 wavs = glob.glob(dest_dir + '/*.wav') if len(wavs) == 0: return 'fail', 'no wav files under this dir' #train the wavs for wav in wavs: try: fs, signal = utils.read_wav(wav) m.enroll(person_id, fs, signal) print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) m.train_single(person_id) m.dump(model) return 'success', ''
def task_predict(input_files, input_model): # 把输入的多个模型目录字符串分离为目录列表 input_models = [os.path.expanduser(k) for k in input_model.strip().split()] # 把各个目录下的模型列表解压出来组合成一个迭代器 models = itertools.chain(*(glob.glob(m) for m in input_models)) # 生成并加载包括所有模型文件的列表 models = [ModelInterface.load(m) for m in models] # 定义统计准确率的变量 right = 0 wrong = 0 num = 0 # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果 for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) print(f) feat = get_feature(fs, signal) predict_result = [] # 每个音频文件分别匹配每个模型组并得出分数放到总列表 for model in models: #print(model) #m = ModelInterface.load(model) results = model.predict(feat) for result in results: predict_result.append(result) #print("predict_result:",predict_result) # 对预测结果按分数作高到底排序 predict_result = sorted(predict_result, key=operator.itemgetter(1), reverse=True) #print("sort_predict_result:", predict_result) # 微信语音数据集的label格式 label = os.path.basename(f).split('_')[0] #[6:11] #label=os.path.basename(f).split('(')[0]#[6:11] # AISHELL数据集的label格式 #label=os.path.basename(f)[6:11] predict = predict_result[0][0] #print('Top:',predict_result[:10]) # 统计准确率 if label in predict: right += 1 print('label:', label, ' predict:', predict, ' right') else: wrong += 1 print('label:', label, ' predict:', predict, ' wrong') num += 1 print('All:', num, ' right:', right, ' wrong:', wrong, ' acc:', right / num)
def task_predict(): m = ModelInterface.load('data.bin') predict_sound_path = os.path.join(os.getcwd(), 'predictSounds') dirs = os.listdir(predict_sound_path) wavs = [] if len(dirs) == 0: print('No wav files found') else: for d in dirs: ext = os.path.splitext(d)[-1].lower() if ext == '.wav': wavs.append(d) for w in wavs: sample_rate, signal = read_wav(os.path.join(predict_sound_path, w)) label = os.path.splitext(w)[0] label2, score = m.predict(sample_rate, signal) print(label, '->', label2, '->', score)
def train( ): m = ModelInterface() train_dir = 'data/train/' train_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] file_name = ['a.wav', 'b.wav', 'c.wav', 'd.wav', 'e.wav', 'f.wav', 'g.wav'] for c in train_class: for n in file_name: fs, signal = wavfile.read(train_dir + c + n) m.enroll(c, fs, signal) m.train() m.dump('model/model.out')
def task_predict(input_files, input_model): total = 0 acc = 0 m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): total += 1 fs, signal = read_wav(f) label = m.predict(fs, signal) print(f, '->', label, end=''), if f.split('/')[-2] == label: print("√") acc += 1 else: print('×') acc = acc * 1.0 / total print(acc)
def task_mfcc_train(input_files, output_model): # 把所有mfcc特征文件统一到一个字典里面 mfcc_dic_all = {} for file in glob.glob(os.path.expanduser(input_files)): with open(file, 'rb') as f: mfcc_dic = pickle.load(f) # 合并字典 mfcc_dic_all = {**mfcc_dic, **mfcc_dic_all} #print([k for k in mfcc_dic]) # 训练并保存模型文件 m = ModelInterface() m.features = mfcc_dic_all m.train() m.dump(output_model) print("%s has been pickle.dumped\t" % output_model)
def train_step( model: ModelInterface, # `torch.optim.optimizer.Optimizer` is ghost. # WHY DOES MYPY NOT RECOGNIZE `torch.optim.Optimizer`? optimizer: 'torch.optim.optimizer.Optimizer', batch: Sequence[Any], label: torch.Tensor) -> float: # criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 60.0])) criterion = torch.nn.CrossEntropyLoss() optimizer.zero_grad() pred = model.forward(batch) loss = criterion(pred, label) loss.backward() optimizer.step() return loss.item()
def task_predict(input_files, input_model): start_time = time.time() print('开始时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))) m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = utils.read_wav(f) label, probability = m.predict(fs, signal) #print(probability) if probability > -48: print(f, '->', label) else: print(f, '->未识别到说话人') end_time = time.time() print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))) print('共耗时', end_time - start_time)
def test( up_bound, lower_bound ): m = ModelInterface.load('model/model_da.out') with open('model/da.out', 'r') as f: da = pickle.load(f) count = 0; test_dir = 'data/test/' test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] file_name = ['1.wav', '2.wav'] for c in test_class: for n in file_name: fs, signal = wavfile.read(test_dir + c + n) if(predict(m, fs, signal[:80000], da, up_bound, lower_bound) == c): count = count + 1 print 'accuracy is:', (100.0*count)/(len(test_class)*len(file_name)), '%'
def evaluate_model( model: ModelInterface, batch: List[object], label: List[int], show_stats: bool = False ) -> Tuple[float, float, List[int]]: with torch.no_grad(): pred = model.predict(batch) pred_label = pred.argmax(dim=1) index = pred_label.to(torch.bool) if show_stats: stats = torch.cat([ pred[index].to('cpu'), torch.tensor(label, dtype=torch.float).reshape(-1, 1)[index] ], dim=1) log.debug(stats) roc_auc, prc_auc = util.evaluate_auc(label, pred[:, 1]) return roc_auc, prc_auc, pred_label.tolist()
def feature_re_extract(): test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] up_bound = [] lower_bound = [] for c in test_class: for i in m.features[c]: train_set.append(i) # put all values into -1~1 up_bound = [] lower_bound = [] for j in xrange(len(train_set[0])): up_bound.append(train_set[0][j]) lower_bound.append(train_set[0][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): up_bound[j] = max(up_bound[j], train_set[i][j]) lower_bound[j] = min(lower_bound[j], train_set[i][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1 # construct stacked autoencoder sda = mSdA( layers = [39, 100] ) sda.setMinMax(up_bound, lower_bound) sda.train(train_set, 500) # use 500 as the batch size for c in test_class: m.features[c] = sda.get_hidden_values(m.features[c]) m.train() m.dump('model/model_sda.out') sda.dump('model/sda.out')
def task_predict(path, input_model): m = ModelInterface.load(input_model) files = [] sum, true = 0, 0 # r=root, d=directories, f = files for r, d, f in os.walk(path): for file in f: if '.wav' in file: files.append(os.path.join(r, file)) for f in files: sum += 1 fs, signal = read_wav(f) label, score = m.predict(fs, signal) strPath = os.path.realpath(f) y_true = os.path.basename(os.path.dirname(strPath)) if (label == y_true): true += 1 print(f, '->', label, ", score->", score) print('So file du doan dung: ', true) print('Tong so file: ', sum) print('accuracy: ', true / sum * 100, '%')
def test(): m = ModelInterface.load('model/model_sda.out') sda = mSdA.load('model/sda.out') count = 0 allsum = 0 test_dir = 'data/test/' test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] file_name = ['1.wav', '2.wav'] for c in test_class: for n in file_name: fs, signal = wavfile.read(test_dir + c + n) signal_size = 40000 for indx in xrange(len(signal)/signal_size): allsum = allsum + 1 if(predict(m, fs, signal[indx*signal_size:(indx+1)*signal_size], sda) == c): count = count + 1 print 'accuracy is:', (100.0*count)/(allsum), '%'
def task_realtime_predict(input_model): print('start') m = ModelInterface.load(input_model) # set recording parameter CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 INTERVAL = 1 INITLEN = 2 p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("* recording") frames = [] # fulfill the frame for i in range(0, int(RATE / CHUNK * INITLEN)): data = np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist() frames.append(data) while True: for i in range(0, int(RATE / CHUNK * INTERVAL)): # 添加新的时间窗数据 frames.append( np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist()) # 去掉最老的时间窗数据 frames.remove(frames[0]) framesjoin = utils.flat_array(frames) framesjoin = np.array(framesjoin) label, probability = m.predict(16000, framesjoin) print('当前说话人->', label)
def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("edytor.ui", self) self.statusBar() self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata =[] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('avatar/') # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = read_wav("bg.wav") self.backend.init_noise(fs, signal) except: pass
class Main(QMainWindow): CONV_INTERVAL = 0.4 CONV_DURATION = 1.5 CONV_FILTER_DURATION = CONV_DURATION FS = 8000 TEST_DURATION = 3 def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("edytor.ui", self) self.statusBar() self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata =[] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('avatar/') # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = read_wav("bg.wav") self.backend.init_noise(fs, signal) except: pass ############ RECORD def start_record(self): self.pyaudio = pyaudio.PyAudio() self.status("Recording...") self.movie.start() self.Alading.setPixmap(QPixmap(u"image/a_thinking.png")) self.recordData = [] self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=Main.FS, input=True, frames_per_buffer=1) self.stopped = False self.reco_th = RecorderThread(self) self.reco_th.start() self.timer.start(1000) self.record_time = 0 self.update_all_timer() def add_record_data(self, i): self.recordData.append(i) return self.stopped def timer_callback(self): self.record_time += 1 self.status("Recording..." + time_str(self.record_time)) self.update_all_timer() def stop_record(self): self.movie.stop() self.stopped = True self.reco_th.wait() self.timer.stop() self.stream.stop_stream() self.stream.close() self.pyaudio.terminate() self.status("Record stopeed") ############## conversation def start_conv_record(self): self.conv_result_list = [] self.start_record() self.conv_now_pos = 0 self.conv_timer = QTimer(self) self.conv_timer.timeout.connect(self.do_conversation) self.conv_timer.start(Main.CONV_INTERVAL * 1000) #reset self.graphwindow.wid.reset() def stop_conv(self): self.stop_record() self.conv_timer.stop() def do_conversation(self): interval_len = int(Main.CONV_INTERVAL * Main.FS) segment_len = int(Main.CONV_DURATION * Main.FS) self.conv_now_pos += interval_len to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]): self.conv_now_pos] signal = np.array(to_filter, dtype=NPDtype) label = None try: signal = self.backend.filter(Main.FS, signal) if len(signal) > 50: label = self.backend.predict(Main.FS, signal, True) except Exception as e: print traceback.format_exc() print str(e) global last_label_to_show label_to_show = label if label and self.conv_result_list: last_label = self.conv_result_list[-1] if last_label and last_label != label: label_to_show = last_label_to_show self.conv_result_list.append(label) print label_to_show, "label to show" last_label_to_show = label_to_show #ADD FOR GRAPH if label_to_show is None: label_to_show = 'Nobody' if len(NAMELIST) and NAMELIST[-1] != label_to_show: NAMELIST.append(label_to_show) self.convUsername.setText(label_to_show) self.Alading_conv.setPixmap(QPixmap(u"image/a_result.png")) self.convUserImage.setPixmap(self.get_avatar(label_to_show)) ###### RECOGNIZE def start_reco_record(self): self.Alading.setPixmap(QPixmap(u"image/a_hello")) self.recoRecordData = np.array((), dtype=NPDtype) self.start_record() def stop_reco_record(self): self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) self.reco_remove_update(Main.FS, signal) def reco_do_predict(self, fs, signal): label = self.backend.predict(fs, signal) if not label: label = "Nobody" print label self.recoUsername.setText(label) self.Alading.setPixmap(QPixmap(u"image/a_result.png")) self.recoUserImage.setPixmap(self.get_avatar(label)) # TODO To Delete write_wav('reco.wav', fs, signal) def reco_remove_update(self, fs, signal): new_signal = self.backend.filter(fs, signal) print "After removed: {0} -> {1}".format(len(signal), len(new_signal)) self.recoRecordData = np.concatenate((self.recoRecordData, new_signal)) real_len = float(len(self.recoRecordData)) / Main.FS / Main.TEST_DURATION * 100 if real_len > 100: real_len = 100 self.reco_do_predict(fs, self.recoRecordData) def reco_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") print 'reco_file' if not fname: return self.status(fname) fs, signal = read_wav(fname) self.reco_do_predict(fs, signal) def reco_files(self): fnames = QFileDialog.getOpenFileNames(self, "Select Wav Files", "", "Files (*.wav)") print 'reco_files' for f in fnames: fs, sig = read_wav(f) newsig = self.backend.filter(fs, sig) label = self.backend.predict(fs, newsig) print f, label ########## ENROLL def start_enroll_record(self): self.enrollWav = None self.enrollFileName.setText("") self.start_record() def enroll_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") if not fname: return self.status(fname) self.enrollFileName.setText(fname) fs, signal = read_wav(fname) signal = monophonic(signal) self.enrollWav = (fs, signal) def stop_enroll_record(self): self.stop_record() print self.recordData[:300] signal = np.array(self.recordData, dtype=NPDtype) self.enrollWav = (Main.FS, signal) # TODO To Delete write_wav('enroll.wav', *self.enrollWav) def do_enroll(self): name = self.Username.text().trimmed() if not name: self.warn("Please Input Your Name") return # self.addUserInfo() new_signal = self.backend.filter(*self.enrollWav) print "After removed: {0} -> {1}".format(len(self.enrollWav[1]), len(new_signal)) print "Enroll: {:.4f} seconds".format(float(len(new_signal)) / Main.FS) if len(new_signal) == 0: print "Error! Input is silent! Please enroll again" return self.backend.enroll(name, Main.FS, new_signal) def start_train(self): self.status("Training...") self.backend.train() self.status("Training Done.") ####### UI related def getWidget(self, splash): t = QtCore.QElapsedTimer() t.start() while (t.elapsed() < 800): str = QtCore.QString("times = ") + QtCore.QString.number(t.elapsed()) splash.showMessage(str) QtCore.QCoreApplication.processEvents() def upload_avatar(self): fname = QFileDialog.getOpenFileName(self, "Open JPG File", "", "File (*.jpg)") if not fname: return self.avatarname = fname self.Userimage.setPixmap(QPixmap(fname)) def loadUsers(self): with open("avatar/metainfo.txt") as db: for line in db: tmp = line.split() self.userdata.append(tmp) self.Userchooser.addItem(tmp[0]) def showUserInfo(self): for user in self.userdata: if self.userdata.index(user) == self.Userchooser.currentIndex() - 1: self.Username.setText(user[0]) self.Userage.setValue(int(user[1])) if user[2] == 'F': self.Usersex.setCurrentIndex(1) else: self.Usersex.setCurrentIndex(0) self.Userimage.setPixmap(self.get_avatar(user[0])) def updateUserInfo(self): userindex = self.Userchooser.currentIndex() - 1 u = self.serdata[userindex] u[0] = unicode(self.Username.displayText()) u[1] = self.Userage.value() if self.Usersex.currentIndex(): u[2] = 'F' else: u[2] = 'M' with open("avatar/metainfo.txt","w") as db: for user in self.userdata: for i in range(3): db.write(str(user[i]) + " ") db.write("\n") def writeuserdata(self): with open("avatar/metainfo.txt","w") as db: for user in self.userdata: for i in range (0,4): db.write(str(user[i]) + " ") db.write("\n") def clearUserInfo(self): self.Username.setText("") self.Userage.setValue(0) self.Usersex.setCurrentIndex(0) self.Userimage.setPixmap(self.defaultimage) def addUserInfo(self): for user in self.userdata: if user[0] == unicode(self.Username.displayText()): return newuser = [] newuser.append(unicode(self.Username.displayText())) newuser.append(self.Userage.value()) if self.Usersex.currentIndex(): newuser.append('F') else: newuser.append('M') if self.avatarname: shutil.copy(self.avatarname, 'avatar/' + user[0] + '.jpg') self.userdata.append(newuser) self.writeuserdata() self.Userchooser.addItem(unicode(self.Username.displayText())) ############# UTILS def warn(self, s): QMessageBox.warning(self, "Warning", s) def status(self, s=""): self.statusBar().showMessage(s) def update_all_timer(self): s = time_str(self.record_time) self.enrollTime.setText(s) self.recoTime.setText(s) self.convTime.setText(s) def dump(self): fname = QFileDialog.getSaveFileName(self, "Save Data to:", "", "") if fname: try: self.backend.dump(fname) except Exception as e: self.warn(str(e)) else: self.status("Dumped to file: " + fname) def load(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: try: self.backend = ModelInterface.load(fname) except Exception as e: self.warn(str(e)) else: self.status("Loaded from file: " + fname) def noise_clicked(self): self.recording_noise = not self.recording_noise if self.recording_noise: self.noiseButton.setText('Stop Recording Noise') self.start_record() else: self.noiseButton.setText('Recording Background Noise') self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) wavfile.write("bg.wav", Main.FS, signal) self.backend.init_noise(Main.FS, signal) def load_noise(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "Wav File (*.wav)") if fname: fs, signal = read_wav(fname) self.backend.init_noise(fs, signal) def load_avatar(self, dirname): self.avatars = {} for f in glob.glob(dirname + '/*.jpg'): name = os.path.basename(f).split('.')[0] print f, name self.avatars[name] = QPixmap(f) def get_avatar(self, username): p = self.avatars.get(str(username), None) if p: return p else: return self.defaultimage def printDebug(self): for name, feat in self.backend.features.iteritems(): print name, len(feat) print "GMMs", print len(self.backend.gmmset.gmms) '''
def feature_re_extract(): #pdb.set_trace() test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] up_bound = [] lower_bound = [] for c in test_class: for i in m.features[c]: train_set.append(i) ''' # put all values into -1~1 up_bound = [] lower_bound = [] for j in xrange(len(train_set[0])): up_bound.append(train_set[0][j]) lower_bound.append(train_set[0][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): up_bound[j] = max(up_bound[j], train_set[i][j]) lower_bound[j] = min(lower_bound[j], train_set[i][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1 ''' # construct autoencoder train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=39, n_hidden=30 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) # train first autoencoder training_epochs = 20 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da(numpy.asarray(train_set))) print 'Training 1st ae epoch %d, cost ' % epoch, c1[len(c1)-1] # train second autoencoder train_set2 = da.get_hidden_values(train_set).eval() train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da2 = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=30, n_hidden=20 ) cost, updates = da2.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da2 = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) training_epochs = 20 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da2(numpy.asarray(train_set2))) print 'Training 2nd ae epoch %d, cost ' % epoch, c1[len(c1)-1] for c in test_class: m.features[c] = da2.get_hidden_values(da.get_hidden_values(m.features[c]).eval()).eval() m.train() m.dump('model/model_sda.out') with open('model/da1.out', 'w') as f: pickle.dump(da, f, -1) with open('model/da2.out', 'w') as f: pickle.dump(da2, f, -1) return up_bound, lower_bound