コード例 #1
0
def train_model(n_epochs, model, traindir, model_name, n_classes, totalsamples, dict_name, results_dir, batch_size=32,testing=False):
    print('...Training...')
    if testing:
        totalsamples = 10
        n_epochs = 3

    # due the variability of samples in each mfcc files batch size must be 1
    gen = generator_train_flatbatch(train_dir=traindir,
                                batch_size=batch_size,
                                n_classes=n_classes)
    earlystop = EarlyStopping(monitor='loss',
                              min_delta=0,
                              patience=3,
                              verbose=1)
    history = model.fit_generator(generator=gen,
                                  steps_per_epoch=totalsamples,
                                  epochs=n_epochs,
                                  verbose=1,
                                  callbacks=[earlystop])
    loss = history.history['loss']
    acc = history.history['acc']
    print(f'Model fit history:{history.history}')
    print(f'Trained on {totalsamples} files for over {n_epochs} epochs.')
    print(f'Results directory: {results_dir}')
    model_name = model_name + f'_E{len(loss)}'
    cdnn_records_add(loss=loss,
                     accuracy=acc,
                     model_name=model_name,
                     nn_records_name=dict_name,
                     results_address=results_dir)
    del gen
    return model, model_name
コード例 #2
0
def evaluate_model(model,
                   testdir,
                   n_classes,
                   totalsamples,
                   model_name,
                   dict_name,
                   results_dir,
                   batch_size=32,
                   testing=False):
    print('...Evaluating...')
    if testing:
        totalsamples = 100

    gen = generator_train_flatbatch(train_dir=testdir,
                                    batch_size=batch_size,
                                    n_classes=n_classes)
    history = model.evaluate_generator(generator=gen, steps=totalsamples)
    loss, acc = history
    print(f'Model fit history:{history}')
    print(f'Trained on {totalsamples} files.')
    print(f'Results directory: {results_dir}')
    cdnn_records_add(loss=loss,
                     accuracy=acc,
                     model_name=model_name,
                     nn_records_name=dict_name,
                     results_address=results_dir)
    del gen
    return
コード例 #3
0
def main(testing=False):

    # Config Values[DNN params]
    Frame_length = 0.025
    Frame_step = 0.01
    Dbdir = './speakers_db_correct/'

    overwrite_MFCCs = False
    TrainAll = False
    testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Wavdir = SysPath + 'corpus/dat/speakers/'
    Dbdir = SysPath +'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    Holddir = SysPath + 'HoldDir/'
    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/'
    N_context = 2
    N_ceps = 26
    wordcount = 10

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(bycount=ByCount,
                                                            holddir=Holddir,
                                                            train_corpus=Traindir,
                                                            test_corpus=Testdir,
                                                            overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(frame_length=Frame_length,
                                                                       frame_step=Frame_step,
                                                                       n_ceps=N_ceps,
                                                                       n_context=N_context,
                                                                       dbdir=Dbdir,
                                                                       datdir=Wavdir,
                                                                       holddir=Holddir,
                                                                       wordcount=wordcount,
                                                                       phoinfdir=PhoInfDir)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        cdnn_dict_name = f'ddcp_blstm_gridsearch_records_wl_{wordcount}.pk'
        print(f'Selected phones: {selected_phones}')
        print(f'Train count & test count: {totalcount}')

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/'
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    ConvLayerList = [[32 for _ in range(15)]]
    DropoutList =[0.8]

    # add one for sil
    N_classes += 1
    selected_phones.append('_')

    seq_sizelist = [64]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir,seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir,seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                #cname = '_'.join(str(x) for x in cl)
                cname = f'{cl[0]}_x{len(cl)}'
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V3'
                model = make_CNNLSTM_classifier(input_tuple=Input_tuple,
                                                conv_layers=cl,
                                                n_classes=N_classes,
                                                seq_size=seq_size,
                                                dropout_rate=dl,
                                                channel_order='channels_last')
                model, Model_name = train_model(n_epochs=N_epochs,
                                                model=model,
                                                traindir=Traindir,
                                                model_name=Model_name,
                                                n_classes=N_classes,
                                                totalsamples=totaltrain,
                                                dict_name=cdnn_dict_name,
                                                results_dir=cdnn_address,
                                                batch_size=seq_size,
                                                testing=testing)
                print('...Evaluating...')
                evaluate_model(model=model,
                               testdir=Testdir,
                               n_classes=N_classes,
                               totalsamples=totaltest,
                               model_name=Model_name,
                               dict_name=cdnn_dict_name,
                               results_dir=cdnn_address,
                               batch_size=seq_size,
                               testing=testing)
                # Forced Accuracy
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_test_bufferedseq_wfname(train_dir=Testdir,
                                                        batch_size=seq_size,
                                                        n_classes=N_classes,
                                                        wfname=True)

                ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)'''
                that = True
                if that:
                    selected_phones.append('_')
                    diagnose = False
                    s_correct = 0
                    f_correct = 0
                    total = 0
                    s_IDS = 0
                    f_IDS = 0
                    maxsegtotal = 0
                    s_seg = 0
                    f_seg = 0
                    print(f'Total Test size:{totaltest}\n')
                    x, y, file = next(gen)
                    cfile = file
                    for _ in range(totaltest):  # amount of words to be judged
                        if diagnose:
                            print(f'Current file:{file}')
                            print(f'Word\'s phones{potphones}')
                        print(file)
                        segcount = 0
                        gwordphones = []  # gold standard word segments
                        swordphones = []  # softmax word segments
                        fwordphones = []  # forced word segments
                        fname = file.split('.')[0]
                        potphones = w2pdict[fname]
                        potphones.append('_')
                        pind = [selected_phones.index(sp) for sp in potphones]
                        predictions = model.predict(x=x)
                        segcount += 1
                        gstd = [selected_phones[sp] for sp in np.argmax(y, axis=2)[0]]
                        softmax = [selected_phones[sp] for sp in np.argmax(predictions, axis=2)[0]]
                        forceda = [selected_phones[pind[sp]] for sp in
                                   np.argmax(predictions[:, :, pind][0], axis=1)]
                        gwordphones += gstd
                        swordphones += softmax
                        fwordphones += forceda
                        x, y, file = next(gen)
                        predictions = model.predict(x=x)
                        if cfile == file:  # break out of word while loop
                            samefile = True
                            cfile = file
                        else:
                            samefile = False
                            cfile = file
                        while samefile:  # track error for same file
                            segcount += 1
                            gstd = [selected_phones[sp] for sp in np.argmax(y, axis=2)[0]]
                            softmax = [selected_phones[sp] for sp in np.argmax(predictions, axis=2)[0]]
                            forceda = [selected_phones[pind[sp]] for sp in
                                       np.argmax(predictions[:, :, pind][0], axis=1)]
                            gwordphones += gstd
                            swordphones += softmax
                            fwordphones += forceda
                            x, y, file = next(gen)
                            predictions = model.predict(x=x)
                            if cfile != file:  # break out of word while loop
                                samefile = False
                                cfile = file
                        # got word segs, process them
                        gseg = segmentphonelist(gwordphones)
                        sseg = segmentphonelist(swordphones)
                        fseg = segmentphonelist(fwordphones)
                        sLD = uttLD(gseg, sseg)
                        fLD = uttLD(gseg, fseg)
                        s_IDS += sLD
                        f_IDS += fLD
                        if diagnose:
                            print('\n')
                            print(gseg)
                            print(sseg)
                            print(fseg)
                            print('\n')
                            print(sLD)
                            print(fLD)
                            print('\n')
                        # accuracy
                        startsil = gseg[-1][1]  # Index of Silence
                        g_len = len(gwordphones[:startsil])
                        s_correct += segCorrect(gwordphones[:startsil], swordphones[:startsil])
                        f_correct += segCorrect(gwordphones[:startsil], fwordphones[:startsil])
                        total += g_len

                        # max-seg-score with known boundaries
                        # per word, then test set score
                        wordweight = 0
                        for seg in gseg[:-1]:  # last phone is silence '_'
                            maxsegtotal += 1
                            cphone = seg[0]
                            sboundedlist = swordphones[seg[1]:seg[2]]
                            fboundedlist = fwordphones[seg[1]:seg[2]]
                            smaxphone = max(sboundedlist, key=sboundedlist.count)
                            fmaxphone = max(fboundedlist, key=fboundedlist.count)
                            if smaxphone == cphone:
                                s_seg += 1
                            if fmaxphone == cphone:
                                f_seg += 1
                            if diagnose:
                                print(seg)
                                print(smaxphone, fmaxphone, cphone)

                    sLDpercent = sLD / total * 100
                    fLDpercent = fLD / total * 100
                    print(f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%')
                    print(f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%')
                    Spercent = s_correct / total * 100
                    Fpercent = f_correct / total * 100
                    print('\n')
                    print(f'Softmax: {s_correct} out of {total}, {Spercent}%')
                    print(f'Forced: {f_correct} out of {total}, {Fpercent}%')

                    Spercent = s_seg / maxsegtotal * 100
                    Fpercent = f_seg / maxsegtotal * 100

                    print(f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%')
                    print(f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%')
                    cdnn_records_add(loss=Spercent,
                                     accuracy=Fpercent,
                                     model_name=Model_name,
                                     nn_records_name=cdnn_dict_name,
                                     results_address=cdnn_address)
                del gen
                del model
                k.clear_session()
コード例 #4
0
def main(testing=False):

    # Config Values[DNN params]
    Frame_length = 0.025
    Frame_step = 0.01
    Dbdir = './speakers_db_correct/'

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Wavdir = SysPath + 'corpus/dat/speakers/'
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    Holddir = SysPath + 'HoldDir/'
    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    N_context = 2
    N_ceps = 26
    wordcount = 30
    ByCount = 4000

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            wordcount=wordcount)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        print(f'Selected phones: {selected_phones}')

        cdnn_dict_name = f'cp_blstm_gridsearch_records_wl_{wordcount}.pk'

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/'
    print(f'Train count \& test count: {totalcount}')
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    ConvLayerList = [[64, 64, 64, 64], [32, 32, 32, 32]]
    DropoutList = [0.8]

    seq_sizelist = [16, 32]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir, seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                cname = '_'.join(str(x) for x in cl)
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}'
                model = make_CNNLSTM_classifier(input_tuple=Input_tuple,
                                                conv_layers=cl,
                                                n_classes=N_classes,
                                                seq_size=seq_size,
                                                dropout_rate=dl,
                                                channel_order='channels_last')
                model, Model_name = train_model(n_epochs=N_epochs,
                                                model=model,
                                                traindir=Traindir,
                                                model_name=Model_name,
                                                n_classes=N_classes,
                                                totalsamples=totaltrain,
                                                dict_name=cdnn_dict_name,
                                                results_dir=cdnn_address,
                                                batch_size=seq_size,
                                                testing=testing)
                evaluate_model(model=model,
                               testdir=Testdir,
                               n_classes=N_classes,
                               totalsamples=totaltest,
                               model_name=Model_name,
                               dict_name=cdnn_dict_name,
                               results_dir=cdnn_address,
                               batch_size=seq_size,
                               testing=testing)
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_train_bufferedseq_wfname(train_dir=Testdir,
                                                         batch_size=seq_size,
                                                         n_classes=N_classes)
                p_correct = 0
                f_correct = 0
                total = 0
                for s in range(totaltest):
                    x, y, file = next(gen)
                    fname = file.split('.')[0]
                    potphones = w2pdict[fname]
                    pind = [selected_phones.index(sp) for sp in potphones]
                    predictions = model.predict_proba(x=x)
                    TrueY = [
                        selected_phones[sp] for sp in np.argmax(y, axis=2)[0]
                    ]
                    PredY = [
                        selected_phones[sp]
                        for sp in np.argmax(predictions, axis=2)[0]
                    ]
                    ForcY = [
                        selected_phones[pind[sp]]
                        for sp in np.argmax(predictions[:, :, pind][0], axis=1)
                    ]
                    p_correct += len(
                        [1 for x, y in zip(TrueY, PredY) if x == y])
                    f_correct += len(
                        [1 for x, y in zip(TrueY, ForcY) if x == y])
                    total += len(TrueY)
                p_percent = p_correct / total * 100
                f_percent = f_correct / total * 100
                cdnn_records_add(loss=p_percent,
                                 accuracy=f_percent,
                                 model_name=Model_name,
                                 nn_records_name=cdnn_dict_name,
                                 results_address=cdnn_address)
                print(
                    f'Predicted correct:{p_correct} out of {total}, {p_percent}'
                )
                print(
                    f'Forced  correct:{f_correct} out of {total}, {f_percent}')

                diagnosis = False
                if diagnosis:
                    print(potphones)
                    print(file)
                    print(
                        f'Goldstd:{[selected_phones[sp] for sp in np.argmax(y, axis=2)[0]]}'
                    )
                    print(
                        f'Max All:{[selected_phones[sp] for sp in np.argmax(predictions,axis=2)[0]]}'
                    )
                    print(
                        f'ForcedA:{[selected_phones[pind[sp]] for sp in np.argmax(predictions[:, :, pind][0], axis=1)]}'
                    )
                del gen
                del model
                k.clear_session()

    cdnn_records_rankNprint(nn_record_name=cdnn_dict_name,
                            results_address=cdnn_address)