Python model Examples

Programming Language: Python

Namespace/Package Name: trf

Method/Function: model

Examples at hotexamples.com: 5

Python model - 5 examples found. These are the top rated real world Python examples of trf.model extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: multiple_run_trf.py Project: xqq2018rebuild/SPMILM

def main():
    if len(sys.argv) == 1:
        print('\"python run.py -train\" train LSTM\n',
              '\"python run.py -rescore\" rescore nbest\n',
              '\"python run.py -wer\" compute WER'
              )

    run_times = range(0, 10)   # for multiple run

    
    bindir = '../../tools/trf/bin/'
    workdir = 'trflm/'
    fres = wb.FRes('models_ppl.txt')
    model = trf.model(bindir, workdir)

    class_num = 200
    train = workdir + 'train.id'
    valid = workdir + 'valid.id'
    test = workdir + 'test.id'
    vocab = workdir + 'vocab_c{}.list'.format(class_num)
    order = 4
    feat = 'g4_w_c_ws_cs_wsh_csh_tied.fs'
    #feat = 'g4_w_c_ws_cs_cpw.fs'
    maxlen = 0
    tmax = 20000
    t0 = 2000
    minibatch = 100
    gamma_lambda = '3000,0'
    gamma_zeta = '0,0.6'
    reg = 4e-5
    thread = 8

    if '-res' in sys.argv:
        fres.Read()
        for i in range(1,len(fres.head)):
            value = []
            for runnum in run_times:
                write_name = 'trf_c{}_{}.run{}'.format(class_num, feat[0:-3], runnum)
                line = fres.Get(write_name)
                value.append(line[i])
            fres.Add('trf_c{}_{}.runavg'.format(class_num, feat[0:-3]), [fres.head[i]],
                     ['{:.2f}+{:.2f}'.format(np.mean(value), np.std(value))] )

    for runnum in run_times:
        write_model = workdir + 'trf_c{}_{}.run{}'.format(class_num, feat[0:-3], runnum)

        if '-train' in sys.argv or '-all' in sys.argv:
            config = '-vocab {} -train {} -valid {} -test {} '.format(vocab, train, valid, test)
            config += ' -order {} -feat {} '.format(order, feat)
            config += ' -len {} '.format(maxlen)
            config += ' -write {0}.model -log {0}.log '.format(write_model)
            config += ' -t0 {} -iter {}'.format(t0, tmax)
            config += ' -gamma-lambda {} -gamma-zeta {}'.format(gamma_lambda, gamma_zeta)
            config += ' -L2 {} '.format(reg)
            config += ' -mini-batch {} '.format(minibatch)
            config += ' -thread {} '.format(thread)
            config += ' -print-per-iter 10 '
            config += ' -write-at-iter [{}:1000:{}]'.format(tmax-5000, tmax)  # output the intermediate models
            model.prepare(data()[0], data()[1], data()[2], class_num)
            model.train(config)
        if '-plot' in sys.argv:
            baseline = fres.Get('KN5')
            trf.PlotLog([write_model], [baseline])
        if '-rescore' in sys.argv or '-all' in sys.argv:
            config = ' -vocab {} '.format(vocab)
            config += ' -read {}.model '.format(write_model)
            config += ' -nbest {} '.format(data()[3])
            config += ' -lmscore {0}.lmscore -lmscore-test-id {0}.test-id '.format(write_model)
            model.use(config)
        if '-wer' in sys.argv or '-all' in sys.argv:
            [read_nbest, read_templ, read_acscore, read_lmscore] = data()[3:7]
            read_lmscore = write_model + '.lmscore'

            [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                                 wb.LoadScore(read_lmscore),
                                                 wb.LoadScore(read_acscore), np.linspace(0.1,0.9,9))
            print('wer={:.4f} lmscale={:.2f} acscale={:.2f}'.format(wer, lmscale, acscale))

            # calculate the ppl on wsj test
            write_templ_id = workdir + os.path.split(read_templ)[1] + '.id'
            v = trf.ReadVocab(vocab)
            trf.NbestToID(read_templ, write_templ_id, v)
            config = ' -vocab {} '.format(vocab)
            config += ' -read {}.model '.format(write_model)
            config += ' -test {} '.format(write_templ_id)
            LL_templ = model.use(config)
            PPL_templ = wb.LL2PPL(-LL_templ, write_templ_id)
            LL = model.get_last_value(write_model + '.log')

            # output the result
            name = os.path.split(write_model)[1]
            fres.AddLL(name, LL, data()[0:3])
            fres.Add(name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])
            fres.AddWER(name, wer)
        if '-stat' in sys.argv:
            # calculate the mean and var of wers of the intermediate models
            inte_wer = []
            inte_model = []

            # find model
            for file_name in os.listdir(os.path.split(write_model)[0]):
                file_path = os.path.split(write_model)[0] + os.sep + file_name
                if not os.path.isfile(file_path):
                    continue
                if file_name.find(os.path.split(write_model)[1]) == 0 and \
                    file_path.split('.')[-1] == 'model' and \
                    file_path.split('.')[-2][0] == 'n':
                    inte_model.append(file_path)

            # compute wer
            flog = open(workdir + 'inte_model_wer.log', 'wt')
            for file_path in sorted(inte_model):
                print(file_path)
                t = int(file_path.split('.')[-2][1:])

                # lmscore
                write_lmscore = os.path.splitext(file_path)[0] + '.lmscore'
                config = ' -vocab {} '.format(vocab)
                config += ' -read {} '.format(file_path)
                config += ' -nbest {} '.format(data()[3])
                config += ' -lmscore {0} '.format(write_lmscore)
                model.use(config, False)
                # wer
                [wer, lmscale, acscale] = wb.TuneWER(data()[3], data()[4],
                                                 wb.LoadScore(write_lmscore),
                                                 wb.LoadScore(data()[5]), np.linspace(0.1, 0.9, 9))
                print('t={} wer={}'.format(t, wer))
                flog.write('{} \t wer={}\n'.format(file_path, wer))
                inte_wer.append([t, wer])
            flog.close()

            # plot wer
            inte_wer = sorted(inte_wer, key=lambda d: d[0])
            t_list = [i[0] for i in inte_wer]
            wer_list = [i[1] for i in inte_wer]
            wer_mean = np.mean(wer_list[-20:])
            wer_std = np.std(wer_list[-20:])
            print('wer_mean={}  wer_std={}'.format(wer_mean, wer_std))

            plt.figure()
            plt.plot(t_list, wer_list)
            plt.xlabel('t')
            plt.ylabel('wer')
            plt.show()
        if '-ais' in sys.argv:
            [read_nbest, read_templ, read_acscore] = data()[3:6]
            write_templ_id = workdir + os.path.split(read_templ)[1] + '.id'
            v = trf.ReadVocab(vocab)
            trf.NbestToID(read_templ, write_templ_id, v)

            # run asi to calculate the normalization constnts of models
            ais_chain = 10
            ais_inter = 10000
            ais_model = '{}.ais{}_{}.model'.format(write_model, ais_chain, ais_inter)
            if not os.path.exists(ais_model):
                config = ' -vocab {0} -read {1}.model -write {2}'.format(vocab, write_model, ais_model)
                config += ' -norm-method AIS -AIS-chain {} -AIS-inter {} -thread {} '.format(ais_chain, ais_inter, thread)
                config += ' -norm-len-max {} '.format(trf.FileMaxLen(read_nbest)-1)  # just compute the needed length
                model.use(config)

            # rescore and compute wer
            write_lmscore = os.path.splitext(ais_model)[0] + '.lmscore'
            config = ' -vocab {} -read {}'.format(vocab, ais_model)
            config += ' -nbest {} -test {} '.format(read_nbest, write_templ_id)  # calculate the ppl of test set
            config += ' -lmscore {} '.format(write_lmscore)
            LL_templ = model.use(config, False)
            PPL_templ = wb.LL2PPL(-LL_templ, write_templ_id)
            [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                                 write_lmscore, read_acscore, np.linspace(0.1, 0.9, 9))
            # calculate the LL of train/valid/test
            LL = [0]*3
            id_data = [train, valid, test]  # are id files
            for i in range(3):
                config = ' -vocab {} -read {} -test {} '.format(vocab, ais_model, id_data[i])
                LL[i] = model.use(config, False)

            # write to res file
            name = os.path.split(write_model)[1]+":AIS{}-{}".format(ais_chain, ais_inter)
            fres.AddLL(name, LL, id_data)
            fres.AddWER(name, wer)
            fres.Add(name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])

Example #2

Show file

def main():
    if len(sys.argv) == 1:
        print('\"python run.py -train\" train LSTM\n',
              '\"python run.py -rescore\" rescore nbest\n',
              '\"python run.py -wer\" compute WER')

    run_times = range(0, 1)  # for multiple run

    bindir = '../../tools/trf/bin/'
    workdir = 'trflm/'
    fres = wb.FRes('models_ppl.txt')
    model = trf.model(bindir, workdir)

    class_num = 200
    train = workdir + 'train.id'
    valid = workdir + 'valid.id'
    test = workdir + 'test.id'
    vocab = workdir + 'vocab_c{}.list'.format(class_num)
    thread = 18

    ais_chain = 10
    ais_inter = 200000

    if '-wer' in sys.argv:
        # calculate mean of the WER of 10 TRFs after AIS
        res_list = []
        for runnum in run_times:
            name = 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run{}.ais{}_{}'.format(
                runnum, ais_chain, ais_inter)
            res = fres.Get(name)[1:]
            if run_times.index(runnum) == 0:
                res_list = [[] for i in range(len(res))]
            for i in range(len(res)):
                res_list[i].append(res[i])
        name = 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.runavg.ais{}_{}'.format(
            ais_chain, ais_inter)
        head = fres.GetHead()[1:]
        for i in range(len(head)):
            mean = np.mean(res_list[i])
            std = np.std(res_list[i])
            fres.Add(name, [head[i]], ['{:.2f}+{:.2f}'.format(mean, std)])

    if '-ais' in sys.argv:
        for runnum in run_times:
            write_model = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run{}'.format(
                runnum)

            [read_nbest, read_templ, read_acscore] = data()[3:6]
            write_templ_id = workdir + os.path.split(read_templ)[1] + '.id'
            v = trf.ReadVocab(vocab)
            trf.NbestToID(read_templ, write_templ_id, v)

            # run asi to calculate the normalization constnts of models
            ais_model = '{}.ais{}_{}'.format(write_model, ais_chain, ais_inter)
            if not os.path.exists(ais_model + '.model'):
                config = ' -vocab {0} -read {1}.model -write {2}.model -log {2}.log'.format(
                    vocab, write_model, ais_model)
                config += ' -norm-method AIS -AIS-chain {} -AIS-inter {} -thread {} '.format(
                    ais_chain, ais_inter, thread)
                config += ' -norm-len-max {} '.format(
                    trf.FileMaxLen(read_nbest) -
                    1)  # just compute the needed length
                model.use(config)

            # rescore and compute wer
            write_lmscore = ais_model + '.lmscore'
            config = ' -vocab {} -read {}.model'.format(vocab, ais_model)
            config += ' -nbest {} -test {} '.format(
                read_nbest, write_templ_id)  # calculate the ppl of test set
            config += ' -lmscore {} '.format(write_lmscore)
            LL_templ = model.use(config, False)
            PPL_templ = wb.LL2PPL(-LL_templ, write_templ_id)
            [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                                 write_lmscore, read_acscore,
                                                 np.linspace(0.1, 0.9, 9))
            # calculate the LL of train/valid/test
            LL = [0] * 3
            id_data = [train, valid, test]  # are id files
            for i in range(3):
                config = ' -vocab {} -read {}.model -test {} '.format(
                    vocab, ais_model, id_data[i])
                LL[i] = model.use(config, False)

            # write to res file
            name = os.path.split(ais_model)[-1]
            fres.AddLL(name, LL, id_data)
            fres.AddWER(name, wer)
            fres.Add(name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])

    if '-cmp' in sys.argv:
        # compare the variance of exp(logz) with the variance of AIS weight
        # Load the logz of 10 independent runs
        multi_run = 10
        logzs = []
        for i in range(multi_run):
            logz = trf.LoadLogz(
                workdir +
                'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0.ais10_20000.run{}.model'
                .format(i))
            logzs.append(logz[0:33])
        mat_logzs = np.matrix(logzs).T

        # Load the weight of each length
        logws = []
        with open(workdir +
                  'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0.ais10_20000.log'
                  ) as f:
            for line in f:
                idx = line.find('logw=')
                if idx != -1:
                    a = line[idx:].split()[1:]
                    logws.append([float(i) for i in a])
        mat_logws = np.matrix(logws)

        w_var = mat_var(mat_logws)
        z_var = mat_var(mat_logzs)

        for i in range(len(w_var)):
            rate = np.exp(w_var[i] - z_var[i])
            print('len={} w_var={} z_var={} rate={}'.format(
                i + 1, w_var[i], z_var[i], rate))
    if '-cmp2' in sys.argv:
        # compare the logz of AIS and the SAMS
        write_model = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0'
        logz_sams = trf.LoadLogz(write_model + '.model')
        logz_ais = trf.LoadLogz('{}.ais{}_{}.model'.format(
            write_model, ais_chain, ais_inter))
        plt.figure()
        plt.plot(logz_sams[0:33], 'r-', label='sams')
        logz_ais10 = []
        for n in range(10):
            logz_ais10.append(
                trf.LoadLogz('{}.ais10_20000.run{}.model'.format(
                    write_model, n)))
            plt.plot(logz_ais10[-1][0:33], 'g--')
        logz_ais_m = [0] * 33
        for i in range(33):
            for n in range(10):
                logz_ais_m[i] += logz_ais10[n][i]
            logz_ais_m[i] /= 10
        plt.plot(logz_ais_m[0:33], 'r--')
        plt.plot(logz_ais[0:33], 'b--', label='ais 10-200K')
        #plt.legend()
        plt.show()

    if '-cmp3' in sys.argv:
        trf_model = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0'
        # revise the logz of the trf model to the mean of results of 10 (10-20k) runs
        logz_sams = trf.LoadLogz(trf_model + '.model')
        logz_ais10 = []
        for n in range(10):
            logz_ais10.append(
                trf.LoadLogz('{}.ais10_20000.run{}.model'.format(trf_model,
                                                                 n)))
        logz_ais_m = [0] * 33
        for i in range(33):
            for n in range(10):
                logz_ais_m[i] += logz_ais10[n][i]
            logz_ais_m[i] /= 10

        print(logz_ais_m)
        ais_model = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0.ais10_20000.runavg.model'
        print('write -> ' + ais_model)
        revise_logz(trf_model + '.model', ais_model, logz_ais_m)

        # compute WER
        print('computer WER')
        wer = model.wer(vocab, ais_model, data()[3], data()[4], data()[5])
        print('WER={}'.format(wer))

        # compute PPL
        print('computer PPL')
        ppl = model.ppl(vocab, ais_model, data()[4], True)
        print('PPL={}'.format(ppl))

        # plot the logzs
        plt.figure()
        for n in range(10):
            plt.plot(logz_ais10[n][0:33], 'g-')
        plt.plot(logz_ais_m[0:33], 'r', label='ais10-20K-mean')
        plt.plot(logz_sams[0:33], 'b', label='sams')
        plt.legend()
        plt.show()

    if '-wer3' in sys.argv:
        # smooth zeta
        wer_ais = []
        wer_smooth = []
        ppl_ais = []
        ppl_smooth = []
        ll_ais = []
        ll_smooth = []
        for n in range(10):
            ais_name = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run{}.ais10_20000'.format(
                n)
            print(ais_name)

            logz_ais = trf.LoadLogz(ais_name + '.model')[0:33]
            z = np.polyfit(np.linspace(1, 33, 33), logz_ais, 1)
            logz_ais_smooth = z[0] * np.linspace(1, 33, 33) + z[1]
            revise_logz(ais_name + '.model', ais_name + '.smooth.model',
                        logz_ais_smooth.tolist())
            print(logz_ais)
            print(logz_ais_smooth.tolist())

            if n == 0:
                logz_sams = trf.LoadLogz(
                    workdir +
                    'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run{}.model'.format(n))
                logw = load_ais_weight(ais_name + '.log')
                plt.figure()
                for i in range(len(logw)):
                    plt.plot((i + 1) * np.ones(len(logw[i])), logw[i], 'k.')
                plt.plot(np.linspace(1, 33, 33),
                         logz_ais,
                         'r-',
                         label='standard AIS')
                plt.plot(np.linspace(1, 33, 33),
                         logz_ais_smooth,
                         'g-',
                         label='smoothed AIS')
                plt.plot(np.linspace(1, 33, 33),
                         logz_sams[0:33],
                         'b-',
                         label='SAMS')
                plt.legend()
                plt.xlim(1, 33)
                plt.xlabel('length')
                plt.ylabel('logZ')
                plt.show()

            wer = model.wer(vocab, ais_name + '.model',
                            data()[3],
                            data()[4],
                            data()[5])
            [ppl, LL] = model.ppl(vocab, ais_name + '.model', data()[4], True)
            wer_ais.append(wer)
            ppl_ais.append(ppl)
            ll_ais.append(LL)

            fres.Add(
                os.path.split(ais_name)[-1], ['WER', 'LL-wsj', 'PPL-wsj'],
                [wer, LL, ppl])

            wer = model.wer(vocab, ais_name + '.smooth.model',
                            data()[3],
                            data()[4],
                            data()[5])
            [ppl, LL] = model.ppl(vocab, ais_name + '.smooth.model',
                                  data()[4], True)
            wer_smooth.append(wer)
            ppl_smooth.append(ppl)
            ll_smooth.append(LL)

            fres.Add(
                os.path.split(ais_name)[-1] + '.smooth',
                ['WER', 'LL-wsj', 'PPL-wsj'], [wer, LL, ppl])

        for label, d in zip(['WER', 'LL-wsj', 'PPL-wsj'],
                            [wer_ais, ll_ais, ppl_ais]):
            cur_mean = np.mean(d)
            cur_std = np.std(d)
            fres.Add('trf_c200_g4_w_c_ws_cs_wsh_csh_tied.runavg.ais10_20000',
                     [label], ['{:.2f}+{:.2f}'.format(cur_mean, cur_std)])

        for label, d in zip(['WER', 'LL-wsj', 'PPL-wsj'],
                            [wer_smooth, ll_smooth, ppl_smooth]):
            cur_mean = np.mean(d)
            cur_std = np.std(d)
            fres.Add(
                'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.runavg.ais10_20000.smooth',
                [label], ['{:.2f}+{:.2f}'.format(cur_mean, cur_std)])

    if '-wer2' in sys.argv:
        # perform adjust-AIS and  evaluate the WER and PPL

        results = []
        for n in range(10):
            ais_name = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run{}.ais10_20000'.format(
                n)
            print(ais_name)
            logw = load_ais_weight(ais_name + '.log')
            logz = [np.mean(a) for a in logw]
            revise_logz(ais_name + '.model', ais_name + '.adjust.model', logz)
            print('  wer')
            wer = model.wer(vocab, ais_name + '.adjust.model',
                            data()[3],
                            data()[4],
                            data()[5])
            print('  ppl')
            [ppl, LL] = model.ppl(vocab, ais_name + '.adjust.model',
                                  data()[4], True)
            fres.Add(
                os.path.split(ais_name)[-1] + '.ad',
                ['WER', 'LL-wsj', 'PPL-wsj'], [wer, LL, ppl])
            results.append([wer, LL, ppl])

        res_mean = []
        res_std = []
        for i in range(3):
            a = [b[i] for b in results]
            res_mean.append(np.mean(a))
            res_std.append(np.std(a))
        fres.Add('trf_c200_g4_w_c_ws_cs_wsh_csh_tied.runavg.ais10_20000.ad', [
            'WER', 'LL-wsj', 'PPL-wsj'
        ], ['{:.2f}+{:.2f}'.format(res_mean[i], res_std[i]) for i in range(3)])

Example #3

Show file

File: run_trf_1.py Project: xqq2018rebuild/SPMILM

def main():
    if len(sys.argv) == 1:
        print('\"python run.py -train\" train LSTM\n',
              '\"python run.py -rescore\" rescore nbest\n',
              '\"python run.py -wer\" compute WER')

    for tsize in [1]:
        bindir = '../../tools/trf/bin/'
        tskdir = '{}/'.format(tsize)
        workdir = tskdir + 'trflm/'

        fres = wb.FRes('result.txt')
        model = trf.model(bindir, workdir)

        class_num = 200
        train = workdir + 'train.id'
        valid = workdir + 'valid.id'
        test = workdir + 'test.id'
        vocab = workdir + 'vocab_c{}.list'.format(class_num)
        order = 4
        feat = 'g4_w_c_ws_cs_wsh_csh_tied.fs'
        #feat = 'g4_w_c_ws_cs_cpw.fs'
        maxlen = 100
        tmax = 50000
        t0 = 2000
        minibatch = 100
        gamma_lambda = '1000,0'
        gamma_zeta = '0,0.6'
        reg = 1e-6
        thread = 8

        write_model = workdir + 'trf_c{}_{}'.format(class_num, feat[0:-3])
        write_name = '{}:{}'.format(tsize, os.path.split(write_model)[1])

        if '-class' in sys.argv:
            # just cluster for each tsks.
            model.prepare(
                data(tskdir)[0],
                data(tskdir)[1],
                data(tskdir)[2], class_num)
        if '-train' in sys.argv or '-all' in sys.argv:
            config = '-vocab {} -train {} -valid {} -test {} '.format(
                vocab, train, valid, test)
            config += ' -order {} -feat {} '.format(order, feat)
            config += ' -len {} '.format(maxlen)
            config += ' -write {0}.model -log {0}.log '.format(write_model)
            config += ' -t0 {} -iter {}'.format(t0, tmax)
            config += ' -gamma-lambda {} -gamma-zeta {}'.format(
                gamma_lambda, gamma_zeta)
            config += ' -L2 {} '.format(reg)
            config += ' -mini-batch {} '.format(minibatch)
            config += ' -thread {} '.format(thread)
            config += ' -print-per-iter 10 -write-at-iter 10000:10000:{}'.format(
                tmax)
            model.prepare(
                data(tskdir)[0],
                data(tskdir)[1],
                data(tskdir)[2], class_num)
            model.train(config)
            # output
            LL = model.get_last_value(write_model + '.log')
            fres.AddLL(write_name, LL, data(tskdir)[0:3])
        if '-plot' in sys.argv:
            baseline = fres.Get('{}:KN5'.format(tsize))
            trf.PlotLog([write_model], [baseline])
        if '-rescore' in sys.argv or '-all' in sys.argv:
            config = ' -vocab {} '.format(vocab)
            config += ' -read {}.model '.format(write_model)
            config += ' -nbest {} '.format(data(tskdir)[3])
            config += ' -lmscore {0}.lmscore -lmscore-test-id {0}.test-id '.format(
                write_model)
            model.use(config)
        if '-wer' in sys.argv or '-all' in sys.argv:
            [read_nbest, read_templ, read_acscore,
             read_lmscore] = data(tskdir)[3:7]
            read_lmscore = write_model + '.lmscore'

            [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                                 wb.LoadScore(read_lmscore),
                                                 wb.LoadScore(read_acscore),
                                                 np.linspace(0.1, 0.9, 9))
            print('wer={:.4f} lmscale={:.2f} acscale={:.2f}'.format(
                wer, lmscale, acscale))

            # calculate the ppl on wsj test
            write_templ_id = workdir + os.path.split(read_templ)[1] + '.id'
            v = trf.ReadVocab(vocab)
            trf.NbestToID(read_templ, write_templ_id, v)
            config = ' -vocab {} '.format(vocab)
            config += ' -read {}.model '.format(write_model)
            config += ' -test {} '.format(write_templ_id)
            LL_templ = model.use(config)
            PPL_templ = wb.LL2PPL(-LL_templ, write_templ_id)

            # output the result
            fres.Add(write_name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])
            fres.AddWER(write_name, wer)

Example #4

Show file

File: multiple_run_ais.py Project: xqq2018rebuild/SPMILM

def main():
    if len(sys.argv) == 1:
        print('\"python run.py -train\" train LSTM\n',
              '\"python run.py -rescore\" rescore nbest\n',
              '\"python run.py -wer\" compute WER')

    run_times = range(3, 10)  # for multiple run

    bindir = '../../tools/trf/bin/'
    workdir = 'trflm/'
    fres = wb.FRes('models_ppl.txt')
    model = trf.model(bindir, workdir)

    class_num = 200
    train = workdir + 'train.id'
    valid = workdir + 'valid.id'
    test = workdir + 'test.id'
    vocab = workdir + 'vocab_c{}.list'.format(class_num)
    thread = 8

    ais_chain = 10
    ais_inter = 20000

    if '-wer' in sys.argv:
        res_list = []
        for runnum in run_times:
            name = 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0.ais{}_{}.run{}'.format(
                ais_chain, ais_inter, runnum)
            res = fres.Get(name)[1:]
            if run_times.index(runnum) == 0:
                res_list = [[] for i in range(len(res))]
            for i in range(len(res)):
                res_list[i].append(res[i])
        name = 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0.ais{}_{}.avg'.format(
            ais_chain, ais_inter)
        head = fres.GetHead()[1:]
        for i in range(len(head)):
            mean = np.mean(res_list[i])
            std = np.std(res_list[i])
            fres.Add(name, [head[i]], ['{:.2f}+{:.2f}'.format(mean, std)])

    if '-ais' in sys.argv:
        for runnum in run_times:
            write_model = workdir + 'trf_c200_g4_w_c_ws_cs_wsh_csh_tied.run0'

            [read_nbest, read_templ, read_acscore] = data()[3:6]
            write_templ_id = workdir + os.path.split(read_templ)[1] + '.id'
            v = trf.ReadVocab(vocab)
            trf.NbestToID(read_templ, write_templ_id, v)

            # run asi to calculate the normalization constnts of models
            ais_model = '{}.ais{}_{}.run{}.model'.format(
                write_model, ais_chain, ais_inter, runnum)
            if not os.path.exists(ais_model):
                config = ' -vocab {0} -read {1}.model -write {2}'.format(
                    vocab, write_model, ais_model)
                config += ' -norm-method AIS -AIS-chain {} -AIS-inter {} -thread {} '.format(
                    ais_chain, ais_inter, thread)
                config += ' -norm-len-max {} '.format(
                    trf.FileMaxLen(read_nbest) -
                    1)  # just compute the needed length
                model.use(config)

            # rescore and compute wer
            write_lmscore = os.path.splitext(ais_model)[0] + '.lmscore'
            config = ' -vocab {} -read {}'.format(vocab, ais_model)
            config += ' -nbest {} -test {} '.format(
                read_nbest, write_templ_id)  # calculate the ppl of test set
            config += ' -lmscore {} '.format(write_lmscore)
            LL_templ = model.use(config, False)
            PPL_templ = wb.LL2PPL(-LL_templ, write_templ_id)
            [wer, lmscale, acscale] = wb.TuneWER(read_nbest, read_templ,
                                                 write_lmscore, read_acscore,
                                                 np.linspace(0.1, 0.9, 9))
            # calculate the LL of train/valid/test
            LL = [0] * 3
            id_data = [train, valid, test]  # are id files
            for i in range(3):
                config = ' -vocab {} -read {} -test {} '.format(
                    vocab, ais_model, id_data[i])
                LL[i] = model.use(config, False)

            # write to res file
            name = os.path.splitext(os.path.split(ais_model)[-1])[0]
            fres.AddLL(name, LL, id_data)
            fres.AddWER(name, wer)
            fres.Add(name, ['LL-wsj', 'PPL-wsj'], [LL_templ, PPL_templ])

Example #5

Show file

def main():
    if len(sys.argv) == 1:
        print('\"python run.py -train\" train LSTM\n',
              '\"python run.py -rescore\" rescore nbest\n',
              '\"python run.py -wer\" compute WER')

    bindir = '../../tools/trf/bin/'
    workdir = 'trflm/'
    fres = wb.FRes('models_ppl.txt')
    model = trf.model(bindir, workdir)

    nbest_root = 'data/nbest/'
    nbest_type_list = ['nbest_mvdr_single_heq_multi']

    class_num = 200
    train = workdir + 'train.id'
    valid = workdir + 'valid.id'
    test = workdir + 'test.id'
    vocab = workdir + 'vocab_c{}.list'.format(class_num)
    order = 4
    feat = 'g4_w_c_ws_cs_wsh_csh_tied.fs'
    #feat = 'g4_w_c_ws_cs_wsh_csh.fs'
    maxlen = 0
    tmax = 20000
    t0 = 0
    minibatch = 100
    gamma_lambda = '3000,0'
    gamma_zeta = '0,0.6'
    reg = 1e-6
    thread = 8

    write_model = workdir + 'trf_c{}_{}_2'.format(class_num, feat[0:-3])
    if '-train' in sys.argv or '-all' in sys.argv:
        config = '-vocab {} -train {} -valid {} -test {} '.format(
            vocab, train, valid, test)
        config += ' -read {}.model'.format(write_model[0:-2])
        config += ' -order {} -feat {} '.format(order, feat)
        config += ' -len {} '.format(maxlen)
        config += ' -write {0}.model -log {0}.log '.format(write_model)
        config += ' -t0 {} -iter {}'.format(t0, tmax)
        config += ' -gamma-lambda {} -gamma-zeta {}'.format(
            gamma_lambda, gamma_zeta)
        config += ' -L2 {} '.format(reg)
        config += ' -mini-batch {} '.format(minibatch)
        config += ' -thread {} '.format(thread)
        config += ' -print-per-iter 10 '
        config += ' -write-at-iter [{}:10000:{}]'.format(
            tmax - 30000, tmax)  # output the intermediate models
        model.prepare('data/train', 'data/valid', 'data/valid', class_num)
        model.train(config)
    if '-plot' in sys.argv:
        baseline = fres.Get('KN5')
        trf.PlotLog([write_model], [baseline])
    if '-rescore' in sys.argv or '-all' in sys.argv:
        for nbest_type in nbest_type_list:
            nbest_dir = nbest_root + nbest_type + '/'
            for tsk in [
                    'nbestlist_{}_{}'.format(a, b) for a in ['dt05', 'et05']
                    for b in ['real', 'simu']
            ]:
                write_dir = workdir + nbest_type + '/' + tsk + '/'
                wb.mkdir(write_dir)
                print('{} : {}'.format(nbest_type, tsk))
                print('  write -> {}'.format(write_dir))
                write_lmscore = write_dir + os.path.split(write_model)[-1]
                # fill the empty lines
                process_nbest(nbest_dir + tsk + '/words_text',
                              write_lmscore + '.nbest')

                config = ' -vocab {} '.format(vocab)
                config += ' -read {}.model '.format(write_model)
                config += ' -nbest {} '.format(write_lmscore + '.nbest')
                config += ' -lmscore {0}.lmscore -lmscore-test-id {0}.test-id '.format(
                    write_lmscore)
                model.use(config)
    if '-wer' in sys.argv or '-all' in sys.argv:
        for nbest_type in nbest_type_list:
            nbest_dir = nbest_root + nbest_type + '/'
            lmpaths = {
                'KN5':
                nbest_dir + '<tsk>/lmwt.lmonly',
                'RNN':
                nbest_dir + '<tsk>/lmwt.rnn',
                'LSTM':
                'lstm/' + nbest_type + '/<tsk>/lmwt.lstm',
                'TRF':
                workdir + nbest_type + '/<tsk>/' +
                os.path.split(write_model)[-1] + '.lmscore'
            }
            # 'TRF': nbestdir + '<tsk>/lmwt.trf'}
            # lmtypes = ['LSTM', 'KN5', 'RNN', 'TRF', 'RNN+KN5', 'LSTM+KN5', 'RNN+TRF', 'LSTM+TRF']
            lmtypes = ['TRF', 'RNN', 'KN5', 'RNN+TRF']
            wer_workdir = 'wer/' + nbest_type + '/'
            print('wer_workdir = ' + wer_workdir)
            wer.wer_all(wer_workdir, nbest_dir, lmpaths, lmtypes)
            config = wer.wer_tune(wer_workdir)
            wer.wer_print(wer_workdir, config)