Ejemplo n.º 1
0
def main():
    data = seq.DataX(
        2,
        vocab_files=[info['pku_vocab_chr'], info['pku_vocab_pos']],
        train_list=info['pku_train'],
        valid_list=info['pku_valid'],
        test_list=info['pku_test'])
    data = data.create_data(0)
    config = ngramlm.Config(data)

    order_reg = [3, 4, 5]
    for order in order_reg:
        config.order = order
        config.cutoff = [0] * order

        workdir = wb.mkdir('ngramlm/' + get_name(config), is_recreate=False)
        sys.stdout = wb.std_log(workdir + '/ngram.log')
        print(workdir)

        m = ngramlm.Model(config, data, bindir, workdir)

        # train
        with wb.processing('train...'):
            m.train(write_to_res=('results.txt', get_name(config)))

        # wer
        nbest_eval_chr(m,
                       data,
                       workdir,
                       res_file='results.txt',
                       res_name=get_name(config))
Ejemplo n.º 2
0
def main():
    data = reader.Data().load_raw_data(file_list=info['hkust_train_chr'] +
                                       info['hkust_valid_chr'] +
                                       info['hkust_valid_chr'],
                                       add_beg_token='<s>',
                                       add_end_token='</s>')
    config = ngramlm.Config(data)

    order_reg = [3, 4, 5]
    for order in order_reg:
        config.order = order
        config.cutoff = [0] * order

        workdir = wb.mkdir('ngramlm/' + get_name(config), is_recreate=False)
        sys.stdout = wb.std_log(workdir + '/ngram.log')
        print(workdir)

        m = ngramlm.Model(config, data, bindir, workdir)

        # train
        with wb.processing('train...'):
            m.train(write_to_res=('results.txt', get_name(config)))

        # wer
        nbest_eval_chr(m,
                       data,
                       workdir,
                       res_file='results.txt',
                       res_name=get_name(config))
Ejemplo n.º 3
0
 def perform(self, step, epoch):
     nbest_eval_chr(self.sampler,
                    self.data,
                    self.logdir,
                    res_file=os.path.join(self.logdir,
                                          'sampler_wer_per_epoch.log'),
                    res_name='epoch%.2f' % epoch,
                    rescore_fun=lambda x: -self.sampler.get_log_probs(x))
    def perform(self, step, epoch):

        if 'sampler' in self.m.__dict__:
            nbest_eval_chr(self.m, self.m.data, self.m.logdir,
                           res_file=os.path.join(self.m.logdir, 'sampler_wer_per_epoch.log'),
                           res_name='epoch%.2f' % epoch,
                           rescore_fun=lambda x: -self.m.sampler.get_log_probs(x)
                           )

        super().perform(step, epoch)
Ejemplo n.º 5
0
def main(_):
    data = reader.Data().load_raw_data(file_list=info['hkust_train_chr'] +
                                       info['hkust_valid_chr'] +
                                       info['hkust_valid_chr'],
                                       add_beg_token=None,
                                       add_end_token='</s>',
                                       add_unknwon_token='<unk>')

    config = small_config(data)
    # config = medium_config(data)
    # config = large_config(data)

    work_dir = './lstm/' + create_name(config)
    wb.prepare_log_dir(work_dir, 'lstm.log')
    config.print()

    data.write_vocab(work_dir + '/vocab.txt')
    data.write_data(data.datas[0], work_dir + '/train.id')
    data.write_data(data.datas[1], work_dir + '/valid.id')
    data.write_data(data.datas[2], work_dir + '/test.id')

    write_model = os.path.join(work_dir, 'model.ckpt')

    # lm = lstmlm.FastLM(config, device_list=['/gpu:0', '/gpu:0'])
    lm = lstmlm.LM(config, data, device='/gpu:0')

    sv = tf.train.Supervisor(logdir=os.path.join(work_dir, 'logs'),
                             summary_op=None,
                             global_step=lm.global_step())
    sv.summary_writer.add_graph(
        tf.get_default_graph())  # write the graph to logs
    session_config = tf.ConfigProto(allow_soft_placement=True,
                                    log_device_placement=False)
    session_config.gpu_options.allow_growth = True
    with sv.managed_session(config=session_config) as session:

        lm.train(session,
                 data,
                 write_model=write_model,
                 write_to_res=('results.txt', create_name(config)),
                 is_shuffle=False)

        print('compute the WER...')
        nbest_eval_chr(lm,
                       data,
                       work_dir,
                       'results.txt',
                       create_name(config),
                       rescore_fun=lambda x: lm.rescore(session, x))