def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) print(f, '->', label, ", score->", score) return label, score
def task_verify(wav_url, person_id): start_time = time.time() print('开始时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))) m = ModelInterface.load(model) if person_id not in m.features: return 'fail', 'current user not trained', '' # 下载训练语音文件, current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time())) dest_wav = verify_voice_dir + current_time + '_' + person_id + '.wav' utils.download_file(wav_url, dest_wav) for f in glob.glob(os.path.expanduser(dest_wav)): fs, signal = utils.read_wav(f) probability = m.verify(fs, signal, person_id) print(probability) if probability > -48: print(f, '-> 匹配成功 :', person_id) return 'success', '', 'yes' else: print(f, '->未匹配成功') return 'success', '', 'no' end_time = time.time() print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))) print('共耗时', end_time - start_time)
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) fs, signal = read_wav(input_files) label, score = m.predict(fs, signal) print("label", '->', label, ", score->", score) result = [label, score] return result
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) if os.path.exists(input_files): for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename( input_files) with DB(host='47.92.33.19', user='******', passwd='1qazxsw2', db='database_fm') as db: # db.execute("INSERT INTO database_fm (id,radio_file_path,sound_markup) VALUES (null,'{}','{}')".format(f,label)) db.execute( "UPDATE fm_t_scan_record SET sound_markup = '{}' WHERE radio_file_path = '{}'" .format(label, filepath)) print(filepath, '->', label, ", score->", score) os.remove(f) else: filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename( input_files) with DB(host='47.92.33.19', user='******', passwd='1qazxsw2', db='database_fm') as db: db.execute( "UPDATE fm_t_scan_record SET sound_markup = 'Exception' WHERE radio_file_path = '{}'" .format(filepath))
def OrderEnroll(): m = ModelInterface.load("model.out") fs, signal = read_wav("./GUI/TotalRecording/18082020202755.wav") m.enroll("18082020202755", fs, signal) m.train() m.CheckEnroll() m.dump("mo1.out") # def task_predictgui(path, input_model): # m = ModelInterface.load(input_model) # f=glob.glob(path) # fs, signal = read_wav(f[0]) # label, score = m.predict(fs, signal) # return label # # if __name__ == "__main__": # # global args # # args = get_args() # # # # task = args.task # # # # if task == 'enroll': # # task_enroll(args.input, args.model) # # # # # # elif task == 'predict': # # task_predict(args.input, args.model) # # task_predict("datatest/*.wav", "model1.out") # task_enroll("./Train/*","model.out") # # # task_predict("./Test", "model.out") # Predict_ByFile("./GUI/TotalRecording/18082020202755.wav", "D:/doantotnghiep/Speaker_recognition/model.out") # OrderEnroll()
def load_model(self, MainWindow): global m fileName = QtWidgets.QFileDialog().getOpenFileName( MainWindow, "Load Model", "", "Model File (*.out)") print(fileName[0]) self.ln_model.setText(fileName[0]) m = ModelInterface.load(fileName[0])
def task_check_status(person_id): m = ModelInterface.load(model) if person_id not in m.features: return 'success', '', 'no' else: return 'success', '', 'yes'
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) g = open("Test_results.txt", "a") print(f, '->', label, ", score->", score, file=g) g.close()
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) str = "label = {l2} score = {l3}".format(l2=label, l3=score) return [str, label]
def load(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: try: self.backend = ModelInterface.load(fname) except Exception as e: self.warn(str(e)) else: self.status("Loaded from file: " + fname)
def __init__(self): self.backend = ModelInterface.load(self.INPUT_MODEL) try: fs, signal = read_wav(self.BG) self.backend.init_noise(fs, signal) except: print "file not found!" self.pub = rospy.Publisher('/speaker',String,queue_size = 10) self.sub = rospy.Subscriber('/wav',numpy_msg(Floats),self.task_predict)
def __init__(self): self.backend = ModelInterface.load(self.INPUT_MODEL) try: fs, signal = read_wav(self.BG) self.backend.init_noise(fs, signal) except: print "file not found!" self.pub = rospy.Publisher('/speaker', String, queue_size=10) self.sub = rospy.Subscriber('/wav', numpy_msg(Floats), self.task_predict)
def Predict_ByFile(file, input_model): print("start") m = ModelInterface.load(input_model) fs, signal = read_wav(file) print(fs) print(signal) label, score = m.predict(fs, signal) strPath = os.path.realpath(file) y_true = os.path.basename(os.path.dirname(strPath)) print(label) print(score) return label
def feature_re_extract(): #pdb.set_trace() test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] for c in test_class: for i in m.features[c]: train_set.append(i) # construct autoencoder train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=39, n_hidden=100 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) # train autoencoder training_epochs = 100 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da(numpy.asarray(train_set))) print 'Training epoch %d, cost ' % epoch, c1[len(c1)-1] for c in test_class: m.features[c] = da.get_hidden_values(m.features[c]).eval() m.train() m.dump('model/model_da.out') with open('model/da.out', 'w') as f: pickle.dump(da, f, -1)
def evaluate(eval_data_dir, model_path): m = ModelInterface.load(model_path) files = [f for f in os.listdir(eval_data_dir) if re.search(r"\.wav", f)] total, n_correct = 0, 0 for f in files: total += 1 label, _ = f.split("_") file = os.path.join(eval_data_dir, f) fs, signal = read_wav(file) pred, _ = m.predict(fs, signal) logger.info("Input: {}, Output: {}".format(file, pred)) if label == pred: n_correct += 1 logger.info("Accuracy: {}".format(n_correct / total))
def task_predict(input_files, input_model): # 把输入的多个模型目录字符串分离为目录列表 input_models = [os.path.expanduser(k) for k in input_model.strip().split()] # 把各个目录下的模型列表解压出来组合成一个迭代器 models = itertools.chain(*(glob.glob(m) for m in input_models)) # 生成并加载包括所有模型文件的列表 models = [ModelInterface.load(m) for m in models] # 定义统计准确率的变量 right = 0 wrong = 0 num = 0 # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果 for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) print(f) feat = get_feature(fs, signal) predict_result = [] # 每个音频文件分别匹配每个模型组并得出分数放到总列表 for model in models: #print(model) #m = ModelInterface.load(model) results = model.predict(feat) for result in results: predict_result.append(result) #print("predict_result:",predict_result) # 对预测结果按分数作高到底排序 predict_result = sorted(predict_result, key=operator.itemgetter(1), reverse=True) #print("sort_predict_result:", predict_result) # 微信语音数据集的label格式 label = os.path.basename(f).split('_')[0] #[6:11] #label=os.path.basename(f).split('(')[0]#[6:11] # AISHELL数据集的label格式 #label=os.path.basename(f)[6:11] predict = predict_result[0][0] #print('Top:',predict_result[:10]) # 统计准确率 if label in predict: right += 1 print('label:', label, ' predict:', predict, ' right') else: wrong += 1 print('label:', label, ' predict:', predict, ' wrong') num += 1 print('All:', num, ' right:', right, ' wrong:', wrong, ' acc:', right / num)
def task_predict(input_files, input_model): start_time = time.time() print('开始时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))) m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = utils.read_wav(f) label, probability = m.predict(fs, signal) #print(probability) if probability > -48: print(f, '->', label) else: print(f, '->未识别到说话人') end_time = time.time() print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))) print('共耗时', end_time - start_time)
def task_predict(): m = ModelInterface.load('data.bin') predict_sound_path = os.path.join(os.getcwd(), 'predictSounds') dirs = os.listdir(predict_sound_path) wavs = [] if len(dirs) == 0: print('No wav files found') else: for d in dirs: ext = os.path.splitext(d)[-1].lower() if ext == '.wav': wavs.append(d) for w in wavs: sample_rate, signal = read_wav(os.path.join(predict_sound_path, w)) label = os.path.splitext(w)[0] label2, score = m.predict(sample_rate, signal) print(label, '->', label2, '->', score)
def task_predict(input_files, input_model): total = 0 acc = 0 m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): total += 1 fs, signal = read_wav(f) label = m.predict(fs, signal) print(f, '->', label, end=''), if f.split('/')[-2] == label: print("√") acc += 1 else: print('×') acc = acc * 1.0 / total print(acc)
def test( up_bound, lower_bound ): m = ModelInterface.load('model/model_da.out') with open('model/da.out', 'r') as f: da = pickle.load(f) count = 0; test_dir = 'data/test/' test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] file_name = ['1.wav', '2.wav'] for c in test_class: for n in file_name: fs, signal = wavfile.read(test_dir + c + n) if(predict(m, fs, signal[:80000], da, up_bound, lower_bound) == c): count = count + 1 print 'accuracy is:', (100.0*count)/(len(test_class)*len(file_name)), '%'
def feature_re_extract(): test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] up_bound = [] lower_bound = [] for c in test_class: for i in m.features[c]: train_set.append(i) # put all values into -1~1 up_bound = [] lower_bound = [] for j in xrange(len(train_set[0])): up_bound.append(train_set[0][j]) lower_bound.append(train_set[0][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): up_bound[j] = max(up_bound[j], train_set[i][j]) lower_bound[j] = min(lower_bound[j], train_set[i][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1 # construct stacked autoencoder sda = mSdA( layers = [39, 100] ) sda.setMinMax(up_bound, lower_bound) sda.train(train_set, 500) # use 500 as the batch size for c in test_class: m.features[c] = sda.get_hidden_values(m.features[c]) m.train() m.dump('model/model_sda.out') sda.dump('model/sda.out')
def task_predict(path, input_model): m = ModelInterface.load(input_model) files = [] sum, true = 0, 0 # r=root, d=directories, f = files for r, d, f in os.walk(path): for file in f: if '.wav' in file: files.append(os.path.join(r, file)) for f in files: sum += 1 fs, signal = read_wav(f) label, score = m.predict(fs, signal) strPath = os.path.realpath(f) y_true = os.path.basename(os.path.dirname(strPath)) if (label == y_true): true += 1 print(f, '->', label, ", score->", score) print('So file du doan dung: ', true) print('Tong so file: ', sum) print('accuracy: ', true / sum * 100, '%')
def test(): m = ModelInterface.load('model/model_sda.out') sda = mSdA.load('model/sda.out') count = 0 allsum = 0 test_dir = 'data/test/' test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] file_name = ['1.wav', '2.wav'] for c in test_class: for n in file_name: fs, signal = wavfile.read(test_dir + c + n) signal_size = 40000 for indx in xrange(len(signal)/signal_size): allsum = allsum + 1 if(predict(m, fs, signal[indx*signal_size:(indx+1)*signal_size], sda) == c): count = count + 1 print 'accuracy is:', (100.0*count)/(allsum), '%'
def task_train_single(wav_url, person_id): if os.path.exists(model): m = ModelInterface.load(model) else: m = ModelInterface() if person_id in m.features: return 'fail', 'aleady exist' #下载训练语音文件 dest_dir = train_voice_dir + person_id if not os.path.exists(dest_dir): os.makedirs(dest_dir) current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time())) dest_wav = dest_dir + '/' + current_time + '_' + person_id + '.wav' print(wav_url) print(dest_wav) utils.download_file(wav_url, dest_wav) #获取下载好的训练语音文件 wavs = glob.glob(dest_dir + '/*.wav') if len(wavs) == 0: return 'fail', 'no wav files under this dir' #train the wavs for wav in wavs: try: fs, signal = utils.read_wav(wav) m.enroll(person_id, fs, signal) print("wav %s has been enrolled" % (wav)) except Exception as e: print(wav + " error %s" % (e)) m.train_single(person_id) m.dump(model) return 'success', ''
def task_realtime_predict(input_model): print('start') m = ModelInterface.load(input_model) # set recording parameter CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 INTERVAL = 1 INITLEN = 2 p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("* recording") frames = [] # fulfill the frame for i in range(0, int(RATE / CHUNK * INITLEN)): data = np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist() frames.append(data) while True: for i in range(0, int(RATE / CHUNK * INTERVAL)): # 添加新的时间窗数据 frames.append( np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist()) # 去掉最老的时间窗数据 frames.remove(frames[0]) framesjoin = utils.flat_array(frames) framesjoin = np.array(framesjoin) label, probability = m.predict(16000, framesjoin) print('当前说话人->', label)
required=True, help="Path to model file") parser.add_argument("--task", required=True, choices=["enroll", "predict"], help='Task to do. Either "enroll" or "predict"') args = parser.parse_args() if args.task == "predict" and not os.path.isfile(args.model_path): raise ValueError("Please provide valid model path") r = sr.Recognizer() r.pause_threshold = 1.0 if args.task == "enroll": if os.path.isfile(args.model_path) and not args.overwrite_model: model = ModelInterface.load(args.model_path) else: model = ModelInterface() print("***** Enroll sound data for one speaker *****") name = input("Enter your name: ") name = name.strip() print( f"Hello {name}. Please input your voice {args.num_samples} times") with tempfile.TemporaryDirectory() as tempdir: i = 1 while i <= args.num_samples: with sr.Microphone() as source: audio = r.listen(source) # Generate random filename filename = os.path.join( tempdir, name + "_" + str(uuid.uuid1()) + ".wav")
def task_predict(input_files, input_model, isDynamic): """ Predict the speaker from the given file(s) Args: input_files (string): full path to the speaker file input_model (string): model trained to give the solution """ # Loads the model object and retrieve the number of speaker # m = ModelInterface.load(input_model) n_label = m.get_n_label() # Computes the threshold (dynamic or static) # if (isDynamic): dyn_thrsh = m.get_dyn_threshold() else: threshold = 1 / n_label # Creates an Evaluation object to save the results # ev = Evaluation() # Starts the prediction process # print(input_files) for f in glob.glob(os.path.expanduser(input_files)): try: start_time = time.time() fs, signal = read_wav(f) signal = signal / max(abs(signal)) # Extracts the features and predicts the label using the higher score within all possible speaker # label, score = m.predict(fs, VAD_process(signal)) except Exception as e: print(f + ' error %s' % (e)) # Retrieves the expected label from the directory (evaluation not real time only) # root = os.path.split(f) if (input_files[-9:] == "*/*/*.wav"): root = os.path.split(root[0]) speaker = os.path.basename(root[0]) # Recognition process : If the given score is higher than the threshold, the label is correct # # Else the speaker is not recognize # if (isDynamic): threshold = dyn_thrsh[label] recog = (score > threshold) # recog = True if not (recog): print(speaker, ' not recognize. ->', label, 'Score->', score) else: print(speaker, '->', label, ', score->', score) # Adds the speaker and its results to the evaluation object # ev.new(speaker, label, recog) # Retrieves the Database label used and prints the accuracy # path = os.path.split(root[0])[0] DB_name = os.path.split(path)[0] DB_name = os.path.basename(os.path.split(DB_name)[1]) print('Accuracy : ', ev.accuracy(), '\n') ev.save(os.path.basename(path), n_label, DB_name, (time.time() - start_time))
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label, score = m.predict(fs, signal) print ('You are', label, " GMM score of ", score)
) for i in range(0, 3, 1): if not (os.path.exists(input_model)): input_model = input( "Model does not exists ! " + str(3 - i) + " attempts left. Enter relative path of the model (ex.: ./model/Pers_DB.out) : " ) else: break if os.path.exists(input_model): # Loading the model and starting the while loop # print("Model found ! Starting real-time recognition process") m = ModelInterface.load(input_model) speaker = input( "Write the name of the speaker (for evaluation purposes) :") start_time = time.time() while tmp < 5: count += 1 buffer.record(chunk_size=sampling_rate) # 1 second of record data = buffer.get_data() data = np.frombuffer(data, 'int16') # Predicting every 3 loop # # Recording at 16000 Hz as sampling rate, (1 * 3) sec as buffer size and converting data in int16 type # if count >= 3:
print(" + Predicting") input_model = "model.out" input_files = "./tmp.wav" # m = ModelInterface.load(input_model) m = model for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label = m.predict(fs, signal) # print max(label, key=operator.itemgetter(1)) if abs(label[0][1] - label[1][1]) > .0000002: speakers_detected = [x for x in label if x[1] >= -.1] if len(speakers_detected) > 0: print max(speakers_detected, key=operator.itemgetter(1)) else: print "___ not similar enough to know speaker"," Best guess:", max(label, key=operator.itemgetter(1)) else: print "... predictions too similar"," Best guess:", max(label, key=operator.itemgetter(1)) mods = ModelInterface() mods = mods.load("./model.out") see_model(mods) import operator # find_user() print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" for i in range(1,60): find_user(mods)
def feature_re_extract(): #pdb.set_trace() test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] up_bound = [] lower_bound = [] for c in test_class: for i in m.features[c]: train_set.append(i) ''' # put all values into -1~1 up_bound = [] lower_bound = [] for j in xrange(len(train_set[0])): up_bound.append(train_set[0][j]) lower_bound.append(train_set[0][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): up_bound[j] = max(up_bound[j], train_set[i][j]) lower_bound[j] = min(lower_bound[j], train_set[i][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1 ''' # construct autoencoder train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=39, n_hidden=30 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) # train first autoencoder training_epochs = 20 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da(numpy.asarray(train_set))) print 'Training 1st ae epoch %d, cost ' % epoch, c1[len(c1)-1] # train second autoencoder train_set2 = da.get_hidden_values(train_set).eval() train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da2 = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=30, n_hidden=20 ) cost, updates = da2.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da2 = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) training_epochs = 20 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da2(numpy.asarray(train_set2))) print 'Training 2nd ae epoch %d, cost ' % epoch, c1[len(c1)-1] for c in test_class: m.features[c] = da2.get_hidden_values(da.get_hidden_values(m.features[c]).eval()).eval() m.train() m.dump('model/model_sda.out') with open('model/da1.out', 'w') as f: pickle.dump(da, f, -1) with open('model/da2.out', 'w') as f: pickle.dump(da2, f, -1) return up_bound, lower_bound
def record_predict(input_file, input_model): m = ModelInterface.load(input_model) fs, signal = read_wav(input_file) label = m.predict(fs, signal) return label
def task_predict(input_files, input_model): # 把输入的多个模型目录字符串分离为目录列表 input_models = [os.path.expanduser(k) for k in input_model.strip().split()] # 把各个目录下的模型列表解压出来组合成一个迭代器 models = itertools.chain(*(glob.glob(m) for m in input_models)) # 生成并加载包括所有模型文件(skgmm.GMMSet object)的列表 models = [ModelInterface.load(m) for m in models] if len(models) == 0: print("No model file found in %s" % input_model) sys.exit(1) # 定义统计准确率的变量 right = 0 right1 = 0 wrong = 0 wrong1 = 0 num = 0 # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果 for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) print(f) feat = get_feature(fs, signal) predict_result = [] # 每个音频文件分别匹配每个模型组并得出分数放到总列表 for model in models: #print(model) #m = ModelInterface.load(model) results = model.predict_proba(feat) print(results) #print(results) for result in results: predict_result.append(result) #print("predict_result:",predict_result) # 对预测结果按分数作高到底排序 predict_result = sorted(predict_result, key=operator.itemgetter(1), reverse=True) #print("sort_predict_result:", predict_result) # 微信语音数据集的label格式 label = os.path.basename(f).split('_')[0] #[6:11] #label=os.path.basename(f).split('(')[0]#[6:11] # AISHELL数据集的label格式 # label=os.path.basename(f)[6:11] predict = predict_result[0][0] predict_score = predict_result[0][1] # #print('Top:',predict_result[:10]) # 统计top1准确率 if label in predict: right1 += 1 print('label:', label, ' predict:', predict, ' score:', predict_score, ' top1 right') else: wrong1 += 1 print('label:', label, ' predict:', predict, ' score:', predict_score, ' top1 wrong') # 统计Top10准确率 predicts = [] predict_scores = [] for pre in predict_result[:10]: predicts.append(pre[0]) predict_scores.append(pre[1]) if label in predicts: right += 1 print('label:', label, ' predicts:', predicts, ' scores:', predict_scores, ' top10 Right') else: wrong += 1 print('label:', label, ' predicts:', predicts, ' scores:', predict_scores, ' top10 Wrong') num += 1 print('top1:', num, ' right:', right1, ' wrong:', wrong1, ' top1 acc:', right1 / num) print('top10:', num, ' right:', right, ' wrong:', wrong, ' top10 acc:', right / num)
def task_predict(input_files, input_model): # 把输入的多个模型目录字符串分离为目录列表 input_models = [os.path.expanduser(k) for k in input_model.strip().split()] # 把各个目录下的模型列表解压出来组合成一个迭代器 models = itertools.chain(*(glob.glob(m) for m in input_models)) # 生成并加载包括所有模型文件(skgmm.GMMSet object)的列表 models = [ModelInterface.load(m) for m in models] if len(models) == 0: print("No model file found in %s" % input_model) sys.exit(1) # 定义统计准确率的变量 right = 0 right1 = 0 wrong = 0 wrong1 = 0 num = 0 # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果 for f in glob.glob(os.path.expanduser(input_files)): start_time = time.time() fs, signal = read_wav(f) print(f) feat = get_feature(fs, signal) #print("Get feature ", time.time() - start_time, " seconds") predict_result = [] f_models = [(feat, m) for m in models] #print(models) # 每个音频文件分别匹配每个模型组并得出分数放到总列表 # for model in models: # #start_time1 = time.time() # #print(model) # # 模型文件是一个元组:(label,gmm) # score = model[1].score(feat) # label=model[0] # result=(label,score) # #print(results) # predict_result.append(result) #print("Get one score ", time.time() - start_time1, " seconds") pool = ThreadPool(2) predict_result = pool.map(get_score, f_models) pool.close() pool.join() #print(results) #print("Get score ", time.time() - start_time, " seconds") proba = GMMSet.softmax([i[1] for i in predict_result]) predict_result = [(predict_result[i][0], proba[i]) for i in range(len(proba))] #print("predict_result:",predict_result) # 对预测结果按分数作高到底排序 predict_result = sorted(predict_result, key=operator.itemgetter(1), reverse=True) #print("sort_predict_result:", predict_result) # 微信语音数据集的label格式 label = os.path.basename(f).split('_')[0] #[6:11] #label=os.path.basename(f).split('(')[0]#[6:11] # AISHELL数据集的label格式 # label=os.path.basename(f)[6:11] predict = predict_result[0][0] predict_score = predict_result[0][1] print("Predict ", time.time() - start_time, " seconds") # #print('Top:',predict_result[:10]) # 统计top1准确率 if label in predict: right1 += 1 print('label:', label, ' predict:', predict, ' score:', predict_score, ' top1 right') else: wrong1 += 1 print('label:', label, ' predict:', predict, ' score:', predict_score, ' top1 wrong') # 统计Top10准确率 predicts = [] predict_scores = [] for pre in predict_result[:10]: predicts.append(pre[0]) predict_scores.append(pre[1]) if label in predicts: right += 1 print('label:', label, ' predicts:', predicts, ' scores:', predict_scores, ' top10 Right') else: wrong += 1 print('label:', label, ' predicts:', predicts, ' scores:', predict_scores, ' top10 Wrong') num += 1 print('top1:', num, ' right:', right1, ' wrong:', wrong1, ' top1 acc:', right1 / num) print('top10:', num, ' right:', right, ' wrong:', wrong, ' top10 acc:', right / num)