def recover(result, label): ''' :param result: :return: ''' key2id, id2key = index_utils.get_mapper( index_file="../IndexFiles/en_char.csv") # print("key2id:\n",key2id) recoverd = [] print("recoverd:\n") for i in range(result.shape[0]): s = "" for j in range(result.shape[1]): if int(result[i][j]) == 0: s += " " else: s += id2key[int(result[i][j])] print("s:", s) print("label:\n") for i in range(label.shape[0]): s = "" for j in range(label.shape[1]): if int(label[i][j]) == 0: s += " " else: s += id2key[int(label[i][j])] print("s:", s)
def get_features(audioFile, labelFile, max_frame_size=MAX_FRAME_SIZE, max_label_size=MAX_LABEL_SIZE): ''' 从音频文件和标签文件得到提取出来的结果 :param audioFile:音频文件 :param labelFile:标签文件 :return:features,是一个列表,里面的元素分别为[mfcc,mfcc真实长度,labels结果,labels真实长度] ''' features = [] key2id, id2key = index_utils.get_mapper( index_file="../IndexFiles/en_char.csv") # print("key2id:",key2id) # print("id2key:",id2key) # 提取音频特征 audio, sample_rate = librosa.core.load(path=audioFile, sr=None) # print("audio:",audio) # print("sample_rate:",sample_rate) mfcc = dst.MFCC_Delta2(audio=audio, sample_rate=sample_rate) # print("mfcc\n", mfcc.shape) # padding label index list mfcc_padded = np.zeros(shape=(max_frame_size, mfcc.shape[1]), dtype=mfcc.dtype) mfcc_padded[:mfcc.shape[0]] = mfcc[:] #print("mfcc_padded:\n", mfcc_padded) features.append(mfcc_padded) # 记录mfcc结果 features.append(mfcc.shape[0]) # 记录mfcc真实帧长度 # 得到标签字母/音素特征并且转化为id label_in = open(file=labelFile) lines = label_in.readlines() index_list = [] str = "" for line in lines: line_list = line.strip().split(sep=" ") # print("line_list:", line_list) for char in line_list[-1]: str += char index_list.append(key2id[char]) label_in.close() #print("str:",str) #print("index_list:",index_list) #padding label index list index_list_padded = np.zeros(shape=(max_label_size, ), dtype=type(index_list[0])) index_list_padded[:len(index_list)] = index_list[:] #print("index_list_padded:", index_list_padded) features.append(index_list_padded) # 记录padding之后的索引列表结果 features.append(len(index_list)) # 记录索列表真实长度 return features
def getMaxSize(file_list): ''' 这个是一个辅助函数,用来获取信息 从根目录得到提取特征之后的最大特征长度和label长度 :param file_list: :return: ''' max_frame_size = 0 max_label_size = 0 #get mapper key2id, id2key = index_utils.get_mapper( index_file="../IndexFiles/en_char.csv") # print("key2id:", key2id) for file in file_list: with open(file=file + ".WRD", encoding="utf-8", errors="ignore") as file_label: str = "" lines = file_label.readlines() for line in lines: line_list = line.strip().split(sep=" ") for char in line_list[-1]: str += char # print("str:",str) # labels = file_label.readlines()[0].strip() # index = indexLabel(labels=labels, label_map=char2id) # # print("index:",index,len(index)) if len(str) > max_label_size: max_label_size = len(str) # get audios audio, rate = librosa.core.load(file + ".WAV", sr=None) # print("audio:\n", audio) # print("rate:\n", rate) features = dst.MFCC_Delta2(audio=audio, sample_rate=rate) size = features.shape[0] # print("size:",size) if size > max_frame_size: max_frame_size = size print("max_frame_size:", max_frame_size) print("max_label_size:", max_label_size)