Example #1
0
    def train(datagenes: list, load_model=None):
        w, h = 1600, 200

        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)

        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(x_set,
                              y_set,
                              batch_size=16,
                              n_mels=h,
                              feature_pad_len=w,
                              feature_dim=3,
                              cut_sub=32)

        model_helper = DCBNN2D(pymap)
        model_helper.compile(feature_shape=(w, h, 1),
                             label_max_string_length=32,
                             ms_output_size=1423)

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)

        model_helper.fit(vloader)
Example #2
0
    def train(datagenes: list, load_model=None):
        w, h = 1600, 200
        max_label_len = 64

        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)
        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(
            x_set,
            y_set,
            batch_size=16,
            feature_pad_len=w,
            n_mels=h,
            max_label_len=max_label_len,
            pymap=pymap,
            melf=MelFeature5(),
            all_train=False,
        )

        model_helper = WAVEM(pymap)
        model_helper.compile(feature_shape=(w, h),
                             label_max_string_length=max_label_len,
                             ms_output_size=pymap.max_index + 1)

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)

        model_helper.fit(vloader, epoch=-1, save_step=100, use_ctc=True)
Example #3
0
    def train(datagenes: list, load_model=None):
        w, h = 1600, 200

        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)

        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(
            x_set,
            y_set,
            batch_size=16,
            n_mels=h,
            feature_pad_len=w,
            feature_dim=3,
            pymap=pymap,
            melf=MelFeature5(),
            divide_feature_len=8,
        )

        model_helper = DCNN2D(pymap)
        model_helper.compile(feature_shape=(w, h, 1),
                             ms_output_size=pymap.max_index +
                             1)  # ctcloss 计算要求: index < num_class-1

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)

        model_helper.fit(vloader, epoch=-1, use_ctc=True)
Example #4
0
    def train(datagenes: list, load_model=None,**kwargs):
        w, h = 1600, 200
        max_label_len = 64


        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)
        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(x_set, y_set,
                              batch_size=16,
                              feature_pad_len=w,
                              n_mels=h,
                              max_label_len=max_label_len,
                              pymap=pymap,
                              melf=MelFeature4(),
                              divide_feature_len=8,
                              all_train=False,
                              )

        model_helper = DCBNN1D(pymap)
        model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len,
                             ms_output_size=pymap.max_index + 1)

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)


        epoch = kwargs.get("epoch",-1)
        save_step = kwargs.get("save_step",1000)


        model_helper.fit(vloader, epoch=epoch, save_step=save_step, use_ctc=True)
Example #5
0
def err_count():
    '''
    根据音频统计错误数据集
    :return:
    '''
    z200 = Z200(config.z200_datapath)
    thchs = Thchs30(config.thu_datapath)
    prime = Primewords(config.prime_datapath)
    stcmd = ST_CMDS(config.stcmd_datapath)
    aishell = AiShell(config.aishell_datapath)

    lst = [z200, thchs, prime, stcmd, aishell]

    from acoustic.ABCDNN import DCBNN1D
    from util.reader import PinyinMapper,VoiceDatasetList,VoiceLoader
    from feature.mel_feature import MelFeature5

    w, h = 1600, 200
    max_label_len = 64
    pymap = PinyinMapper(sil_mode=-1)

    model_helper = DCBNN1D(pymap)
    model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len,
                         ms_output_size=pymap.max_index + 1)

    model_helper.load(os.path.join(config.model_dir,"cur_best_DCBNN1D_epoch_722_step_722000.h5"))


    dataset = VoiceDatasetList()
    x_set, y_set = dataset.merge_load(lst)

    vloader = VoiceLoader(x_set, y_set,
                          batch_size=16,
                          feature_pad_len=w,
                          n_mels=h,
                          max_label_len=max_label_len,
                          pymap=pymap,
                          melf=MelFeature5(),
                          divide_feature_len=8,
                          all_train=True,
                          )
    viter = vloader.create_iter(one_batch = True,return_word=True)

    with open("data_err.txt","w",encoding="utf-8") as w:
        for i,batch in enumerate(viter):
            [_, ys, _, label_len], words = batch
            py_true_b = pymap.batch_vector2pylist(ys,return_word_list=True,return_list=True)
            py_pred_b = model_helper.predict(batch)
            for py_true,py_pred,llen,word in zip(py_true_b,py_pred_b,label_len,words):
                llen = llen[0]
                # w.write(f"{pyt}")
                py_true = py_true[:llen]

                py_true = " ".join(py_true)
                py_pred = " ".join(py_pred)

                print(f"\r——{i*16}.",end="\0",flush=True)
                # print(word.strip(),py_pred)
                w.write(f"{word.strip()}\t{py_true}\t{py_pred}\n")
Example #6
0
    def train(datagene: list, load_model=None):
        dataset = VoiceDatasetList()
        _, y_set = dataset.merge_load(datagene, choose_x=False, choose_y=True)

        max_label_len = 64
        pinyin_map = PinyinMapper(sil_mode=0)
        chs_map = ChsMapper()
        tloader = TextLoader(y_set,
                             padding_length=max_label_len,
                             pinyin_map=pinyin_map,
                             cut_sub=16,
                             chs_map=chs_map)

        model_helper = DCNN1D()
        model_helper.compile(feature_shape=(max_label_len,
                                            tloader.max_py_size),
                             ms_input_size=pinyin_map.max_index,
                             ms_output_size=chs_map.categores)

        if load_model is not None:
            model_helper.load(load_model)

        model_helper.fit(tloader, -1)
Example #7
0
stcmd = ST_CMDS(config.stcmd_datapath)  # 据说还可以
thchs = Thchs30(
    config.thu_datapath)  # 同质性太高,不过好拟合,可以用来测试模型的效果,在这个数据上都没法得到比较好的结果的就没啥使用的必要了
prime = Primewords(config.prime_datapath)
aishell = AiShell(config.aishell_datapath)  # 据说数据集很差,不用该数据训练
z200 = Z200(config.z200_datapath)

# datagenes = [thchs,stcmd,prime,aishell,z200]
datagenes = [thchs]
if __name__ == "__main__":
    w, h = 1600, 200
    max_label_len = 64
    batch_size = 16

    dataset = VoiceDatasetList()
    x_set, y_set = dataset.merge_load(datagenes)
    pymap = PinyinMapper(sil_mode=-1)
    vloader = VoiceLoader(x_set,
                          y_set,
                          batch_size=16,
                          feature_pad_len=w,
                          n_mels=h,
                          max_label_len=max_label_len,
                          pymap=pymap,
                          divide_feature_len=8,
                          melf=MelFeature5(),
                          all_train=True)

    viter = vloader.create_iter(one_batch=True)
    for i, _ in enumerate(viter):