Esempio n. 1
0
def recover(result, label):
    '''
    :param result:
    :return:
    '''
    key2id, id2key = index_utils.get_mapper(
        index_file="../IndexFiles/en_char.csv")
    # print("key2id:\n",key2id)
    recoverd = []
    print("recoverd:\n")
    for i in range(result.shape[0]):
        s = ""
        for j in range(result.shape[1]):
            if int(result[i][j]) == 0:
                s += " "
            else:
                s += id2key[int(result[i][j])]
        print("s:", s)

    print("label:\n")
    for i in range(label.shape[0]):
        s = ""
        for j in range(label.shape[1]):
            if int(label[i][j]) == 0:
                s += " "
            else:
                s += id2key[int(label[i][j])]
        print("s:", s)
Esempio n. 2
0
def get_features(audioFile,
                 labelFile,
                 max_frame_size=MAX_FRAME_SIZE,
                 max_label_size=MAX_LABEL_SIZE):
    '''
    从音频文件和标签文件得到提取出来的结果
    :param audioFile:音频文件
    :param labelFile:标签文件
    :return:features,是一个列表,里面的元素分别为[mfcc,mfcc真实长度,labels结果,labels真实长度]
    '''
    features = []
    key2id, id2key = index_utils.get_mapper(
        index_file="../IndexFiles/en_char.csv")
    # print("key2id:",key2id)
    # print("id2key:",id2key)
    # 提取音频特征
    audio, sample_rate = librosa.core.load(path=audioFile, sr=None)
    # print("audio:",audio)
    # print("sample_rate:",sample_rate)
    mfcc = dst.MFCC_Delta2(audio=audio, sample_rate=sample_rate)
    # print("mfcc\n", mfcc.shape)
    # padding label index list
    mfcc_padded = np.zeros(shape=(max_frame_size, mfcc.shape[1]),
                           dtype=mfcc.dtype)
    mfcc_padded[:mfcc.shape[0]] = mfcc[:]
    #print("mfcc_padded:\n", mfcc_padded)
    features.append(mfcc_padded)  # 记录mfcc结果
    features.append(mfcc.shape[0])  # 记录mfcc真实帧长度
    # 得到标签字母/音素特征并且转化为id
    label_in = open(file=labelFile)
    lines = label_in.readlines()
    index_list = []
    str = ""
    for line in lines:
        line_list = line.strip().split(sep=" ")
        # print("line_list:", line_list)
        for char in line_list[-1]:
            str += char
            index_list.append(key2id[char])
    label_in.close()
    #print("str:",str)
    #print("index_list:",index_list)
    #padding label index list
    index_list_padded = np.zeros(shape=(max_label_size, ),
                                 dtype=type(index_list[0]))
    index_list_padded[:len(index_list)] = index_list[:]
    #print("index_list_padded:", index_list_padded)
    features.append(index_list_padded)  # 记录padding之后的索引列表结果
    features.append(len(index_list))  # 记录索列表真实长度
    return features
Esempio n. 3
0
def getMaxSize(file_list):
    '''
    这个是一个辅助函数,用来获取信息
        从根目录得到提取特征之后的最大特征长度和label长度
    :param file_list:
    :return:
    '''
    max_frame_size = 0
    max_label_size = 0
    #get mapper
    key2id, id2key = index_utils.get_mapper(
        index_file="../IndexFiles/en_char.csv")
    # print("key2id:", key2id)
    for file in file_list:
        with open(file=file + ".WRD", encoding="utf-8",
                  errors="ignore") as file_label:
            str = ""
            lines = file_label.readlines()
            for line in lines:
                line_list = line.strip().split(sep=" ")
                for char in line_list[-1]:
                    str += char
            # print("str:",str)
            # labels = file_label.readlines()[0].strip()
            # index = indexLabel(labels=labels, label_map=char2id)
            # # print("index:",index,len(index))
            if len(str) > max_label_size:
                max_label_size = len(str)

        # get audios
        audio, rate = librosa.core.load(file + ".WAV", sr=None)
        # print("audio:\n", audio)
        # print("rate:\n", rate)
        features = dst.MFCC_Delta2(audio=audio, sample_rate=rate)
        size = features.shape[0]
        # print("size:",size)
        if size > max_frame_size:
            max_frame_size = size
    print("max_frame_size:", max_frame_size)
    print("max_label_size:", max_label_size)