def SpeechPostProcess(resultList, in_len): # 未完成!!! #print("--------------------") #print(resultList) resultList1 = resultList[0] #print "resultList[0] shape" + str(resultList1.shape) # (200,1,1,1424) #print resultList1[0][0][0][1423] # ***************** resultArray = resultList[0] batchNum = resultArray.shape[0] #200 #print "batchNum: " + str(batchNum) confidenceNum = resultArray.shape[1] #1 #print "confidenceNum: " + str(confidenceNum) #print "resultArray: " + str(resultArray) #print "resultArray shape: " + str(resultArray.shape) # (200,1,1,1424) confidenceList = resultArray[:, 0, 0, :] resultArray1 = np.swapaxes(resultArray, 0, 2) resultArray2 = np.swapaxes(resultArray1, 0, 1) confidenceList = resultArray2[0] #print(confidenceList) #print("+++++++++++++++++++++++++") #print "confidenceList shape: " + str(confidenceList.shape) # (1,200,1424) confidenceArray = np.array(confidenceList) #print "confidenceArray: " + str(confidenceArray) resultList = confidenceArray #resultList = resultList[:, :, :] #print("----------------------") #print(resultList.shape) ret = K.ctc_decode(resultList, in_len, greedy=True, beam_width=100, top_paths=1) #print(ret[0]) ret1 = K.get_value(ret[0][0]) #print ret1 ret1 = ret1[0] list_symbol_dic = GetSymbolList() r_str = [] for i in ret1: r_str.append(list_symbol_dic[i]) print "拼音序列识别结果:" + str(r_str) ml = ModelLanguage('language_model') ml.LoadModel() #ml = LoadModel() str_pinyin = r_str #str_pinyin = ['ni3', 'hao3', 'a1'] #print str_pinyin r = ml.SpeechToText(str_pinyin) return r
def SpeechPostProcess(resultLists): """ Function description: Save speech recognition results Parameter: resulilist Return Value: txt,pinyin """ # 将三维矩阵转为二维 # print("AAA") dets = np.reshape(resultLists, (200, 1424)) # print("BBB") # 将识别结果转为拼音序列 rr, ret1 = greedy_decode(dets) # 去除拼音序列中的blank for i in range(len(ret1)): if i % 2 == 0: try: ret1.remove(1423) except Except as e: pass # print(rr,ret1) list_symbol_dic = GetSymbolList() r_str = [] for i in ret1: r_str.append(list_symbol_dic[i]) # print("拼音序列识别结果:" + str(r_str)) string_pinyin = str(r_str) current_paths = os.path.dirname(__file__) ml = ModelLanguage(os.path.join(current_paths + "/language_model")) ml.LoadModel() str_pinyin = r_str r = ml.SpeechToText(str_pinyin) # 保存语音识别的结果 with open(os.path.join(current_paths, 'results/asr_results.txt'), 'a+b') as f: data = string_pinyin[1:-1] + '-' + r + '\n' # print(1111111,data) data=data.encode() f.write(data) f.close() return r, str_pinyin
def SpeechPostProcess(resultList, in_len): # 将三维矩阵转为二维 dets = np.reshape(resultList, (200,1424)) # 将识别结果转为拼音序列 rr, ret1 = greedy_decode(dets) # 去除拼音序列中的blank for i in range(len(ret1)): if i % 2 == 0: try: ret1.remove(1423) except: pass list_symbol_dic = GetSymbolList() r_str = [] for i in ret1: r_str.append(list_symbol_dic[i]) #print "拼音序列识别结果:" + str(r_str) string_pinyin = str(r_str) ml = ModelLanguage('language_model') ml.LoadModel() str_pinyin = r_str r = ml.SpeechToText(str_pinyin) #print(r) # 保存语音识别的结果 with open('results/asr_results.txt','a+b') as f: data = string_pinyin[1:-1] + '-' + r + '\n' #print(data) data=data.encode() f.write(data) f.close() return r, str_pinyin
def SpeechPostProcess(resultList, in_len): resultList1 = resultList[0] resultArray = resultList[0] batchNum = resultArray.shape[0] confidenceNum = resultArray.shape[1] confidenceList = resultArray[:, 0, 0, :] resultArray1 = np.swapaxes(resultArray, 0, 2) resultArray2 = np.swapaxes(resultArray1, 0, 1) confidenceList = resultArray2[0] confidenceArray = np.array(confidenceList) resultList = confidenceArray ret = K.ctc_decode(resultList, in_len, greedy=True, beam_width=100, top_paths=1) #print(ret[0]) ret1 = K.get_value(ret[0][0]) #print ret1 ret1 = ret1[0] list_symbol_dic = GetSymbolList() r_str = [] for i in ret1: r_str.append(list_symbol_dic[i]) print "拼音序列识别结果:" + str(r_str) ml = ModelLanguage('language_model') ml.LoadModel() str_pinyin = r_str r = ml.SpeechToText(str_pinyin) return r