Esempio n. 1
0
def evaulate_trf(model, vocab, read_model, tsize, fres):
    res_name = '{}:'.format(int(tsize)) + os.path.split(read_model)[-1]
    tskdir = '{}/'.format(tsize)

    # rescore
    config = ' -vocab {} '.format(vocab)
    config += ' -read {}.model '.format(read_model)
    config += ' -nbest {} '.format(data(tskdir)[3])
    config += ' -lmscore {0}.lmscore'.format(read_model)
    model.use(config)
    # WER
    [read_nbest, read_templ, read_acscore, read_lmscore] = data(tskdir)[3:7]
    read_lmscore = read_model + '.lmscore'

    [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                         wb.LoadScore(read_lmscore),
                                         wb.LoadScore(read_acscore),
                                         np.linspace(0.1, 0.9, 9))
    print('wer={:.4f} lmscale={:.2f} acscale={:.2f}'.format(
        wer, lmscale, acscale))
    # calculate the ppl on wsj test
    templ_txt = model.workdir + os.path.split(read_templ)[-1] + '.rmlabel'
    wb.file_rmlabel(read_templ, templ_txt)
    PPL_templ = model.ppl(vocab, read_model + '.model', templ_txt)
    LL_templ = -wb.PPL2LL(PPL_templ, templ_txt)

    # output the result
    fres.Add(res_name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])
    fres.AddWER(res_name, wer)
Esempio n. 2
0
def main():
    print(sys.argv)
    if len(sys.argv) == 1:
        print('\"python run_ngram.py -train\" train \n',
              '\"python run_ngram.py -rescore\" rescore nbest\n',
              '\"python run_ngram.py -wer\" compute WER')

    bindir = '../../tools/srilm/'
    fres = wb.FRes('result.txt')  # the result file
    order_reg = [5]

    for tsize in [1, 2, 4]:
        tskdir = '{}/'.format(tsize)
        workdir = tskdir + 'ngramlm/'
        model = ngram.model(bindir, workdir)

        for order in order_reg:
            write_model = workdir + '{}gram.lm'.format(order)
            write_name = '{}:KN{}'.format(tsize, order)

            print(write_model)

            if '-train' in sys.argv or '-all' in sys.argv:
                if order_reg.index(order) == 0:
                    model.prepare(
                        data(tskdir)[0],
                        data(tskdir)[1],
                        data(tskdir)[2])
                model.train(order, write_model)

            if '-test' in sys.argv or '-all' in sys.argv:
                PPL = [0] * 3
                PPL[0] = model.ppl(write_model, order, data(tskdir)[0])
                PPL[1] = model.ppl(write_model, order, data(tskdir)[1])
                PPL[2] = model.ppl(write_model, order, data(tskdir)[2])
                fres.AddPPL(write_name, PPL, data(tskdir)[0:3])

            if '-rescore' in sys.argv or '-all' in sys.argv:
                model.rescore(write_model, order,
                              data(tskdir)[3], write_model[0:-3] + '.lmscore')

            if '-wer' in sys.argv or '-all' in sys.argv:
                [nbest, templ] = data(tskdir)[3:5]
                lmscore = wb.LoadScore(write_model[0:-3] + '.lmscore')
                acscore = wb.LoadScore(data(tskdir)[5])

                [wer, lmscale, acscale] = wb.TuneWER(nbest, templ, lmscore,
                                                     acscore,
                                                     np.linspace(0.1, 0.9, 9))
                print('wer={} lmscale={} acscale={}'.format(
                    wer, lmscale, acscale))
                fres.AddWER(write_name, wer)

                trans_txt = workdir + os.path.split(templ)[-1] + '.txt'
                wb.file_rmlabel(templ, trans_txt)
                PPL_temp = model.ppl(write_model, order, trans_txt)
                LL_temp = -wb.PPL2LL(PPL_temp, trans_txt)
                fres.Add(write_name, ['LL-wsj', 'PPL-wsj'],
                         [LL_temp, PPL_temp])
Esempio n. 3
0
def main():
    print(sys.argv)
    if len(sys.argv) == 1:
        print('\"python run_ngram.py -train\" train \n',
              '\"python run_ngram.py -rescore\" rescore nbest\n',
              '\"python run_ngram.py -wer\" compute WER')

    absdir = os.getcwd() + '/'
    bindir = absdir + '../../tools/srilm/'
    workdir = absdir + 'ngramlm/'
    wb.mkdir(workdir)

    datas = [absdir + i for i in data()]
    result_file = absdir + 'models_ppl.txt'  # the result file
    model = ngram.model(bindir, workdir)
    order_reg = [2, 3, 4, 5]

    for order in order_reg:
        write_model = workdir + '{}gram.lm'.format(order)
        print(write_model)

        if '-train' in sys.argv:
            if order_reg.index(order) == 0:
                model.prepare(datas[0], datas[1], datas[2])
            model.train(order, write_model, absdir + 'models_ppl.txt')
        if '-rescore' in sys.argv:
            model.rescore(write_model, order, datas[3],
                          write_model[0:-3] + '.lmscore')
        if '-wer' in sys.argv:
            [nbest, templ] = datas[3:5]
            lmscore = wb.LoadScore(write_model[0:-3] + '.lmscore')
            acscore = wb.LoadScore(datas[5])

            [wer, lmscale, acscale] = wb.TuneWER(nbest, templ, lmscore,
                                                 acscore,
                                                 np.linspace(0.1, 0.9, 9))
            print('wer={} lmscale={} acscale={}'.format(wer, lmscale, acscale))
            fres = wb.FRes(result_file)
            fres.AddWER('KN{}'.format(order), wer)

            trans_txt = workdir + os.path.split(templ)[-1] + '.txt'
            wb.file_rmlabel(templ, trans_txt)
            PPL_temp = model.ppl(write_model, order, trans_txt)
            LL_temp = -wb.PPL2LL(PPL_temp, trans_txt)
            fres.Add('KN{}'.format(order), ['LL-wsj', 'PPL-wsj'],
                     [LL_temp, PPL_temp])
Esempio n. 4
0
                model.prepare(data(tskdir)[0], data(tskdir)[1], data(tskdir)[2], data(tskdir)[3])
                model.train(config)
            else:
                print('exist model: ' + write_model)

        if '-test' in sys.argv or '-all' in sys.argv:
            PPL = [0]*3
            PPL[0] = model.ppl(write_model, data(tskdir)[0])
            PPL[1] = model.ppl(write_model, data(tskdir)[1])
            PPL[2] = model.ppl(write_model, data(tskdir)[2])
            fres.AddPPL(write_name, PPL, data(tskdir)[0:3])

        if '-rescore' in sys.argv or '-all' in sys.argv:
            write_lmscore = write_model[0:-4] + '.lmscore'
            model.rescore(write_model, data(tskdir)[3], write_lmscore)

        if '-wer' in sys.argv or '-all' in sys.argv:
            [read_nbest, read_templ, read_acscore, read_lmscore] = data(tskdir)[3:7]
            read_lmscore = write_model[0:-4] + '.lmscore'

            [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                                 read_lmscore, read_acscore, np.linspace(0.1, 0.9, 9))
            print('wer={:.4f} lmscale={:.2f} acscale={:.2f}'.format(wer, lmscale, acscale))
            fres.AddWER(write_name, wer)

            write_templ_rm = workdir + os.path.split(read_templ)[1] + '.rmlabel'
            rnn.Nbest_rmlabel(read_templ, write_templ_rm)
            PPL_templ = model.ppl(write_model, write_templ_rm)
            LL_templ = -wb.PPL2LL(PPL_templ, write_templ_rm)
            fres.Add(write_name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])