def main(_):
    config = trfnce.Config(data)

    config.structure_type = 'mix'
    config.embedding_dim = 128
    config.cnn_filters = [(i, 128) for i in range(1, 5)]
    config.cnn_hidden = 128
    config.cnn_layers = 1
    config.cnn_skip_connection = False
    config.cnn_residual = True
    config.cnn_activation = 'relu'
    config.rnn_hidden_layers = 1
    config.rnn_hidden_size = 128
    config.attention = True

    config.batch_size = 100
    config.noise_factor = 2
    config.noise_sampler = 2
    config.init_weight = 0.1
    config.optimize_method = ['sgd', 'sgd']
    config.lr_param = trfbase.LearningRateEpochDelay(1e-2, 0.5)
    config.lr_zeta = trfbase.LearningRateEpochDelay(1e-2, 0.5)
    config.max_epoch = 10
    # config.dropout = 0.75
    # config.init_zeta = config.get_initial_logz(0)
    config.update_zeta = True
    config.write_dbg = False
    config.pprint()

    q_config = run_lstmlm.small_config(data)
    # q_config = None

    name = create_name(config, q_config)
    logdir = 'trf_nce/' + name
    wb.mkdir(logdir, is_recreate=True)
    sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log'))
    print(logdir)

    data.write_vocab(logdir + '/vocab.txt')
    data.write_data(data.datas[1], logdir + '/valid.id')
    data.write_data(data.datas[2], logdir + '/test.id')

    # wb.rmdir(logdirs)
    with tf.Graph().as_default():
        if q_config is None:
            m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0')
        else:
            m = trfnce.TRF(config,
                           data,
                           logdir=logdir,
                           device='/gpu:1',
                           q_model=lstmlm.LM(q_config, device='/gpu:1'))
        # noise_lstm = lstmlm.LM(run_lstmlm_withBegToken.small_config(data), device='/gpu:1')
        # m.lstm = noise_lstm

        sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'),
                                 global_step=m.train_net.global_step)
        sv.summary_writer.add_graph(
            tf.get_default_graph())  # write the graph to logs
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        with sv.managed_session(config=session_config) as session:
            m.set_session(session)

            if m.q_model is not None:
                print('load lstmlm for q model')
                m.q_model.restore(
                    session, './lstm/' + run_lstmlm.create_name(q_config) +
                    '/model.ckpt')

            m.train(
                sv,
                session,
                print_per_epoch=0.1,
                operation=task.Ops(m),
                # nbest=nbest,
                # lmscale_vec=np.linspace(1, 20, 20)
            )
Beispiel #2
0
def main(_):
    data = reader.Data().load_raw_data(reader.ptb_raw_dir(),
                                       add_beg_token='<s>',
                                       add_end_token='</s>',
                                       add_unknwon_token='<unk>')
    nbest = reader.NBest(*reader.wsj0_nbest())
    nbest_list = data.load_data(nbest.nbest, is_nbest=True)
    print('nbest list info=', wb.TxtInfo(nbest.nbest))

    config = trfnce.Config(data)
    config.structure_type = 'rnn'
    config.embedding_dim = 200
    config.rnn_hidden_layers = 2
    config.rnn_hidden_size = 200
    config.batch_size = 20
    config.noise_factor = 100
    config.noise_sampler = 2
    config.init_weight = 0.1
    config.lr_param = trfbase.LearningRateTime(1e-3)
    config.max_epoch = 100
    # config.dropout = 0.75
    # config.init_zeta = config.get_initial_logz(20)
    config.update_zeta = False
    config.write_dbg = False
    config.pprint()

    name = create_name(config)
    logdir = 'trf_nce/' + name
    wb.mkdir(logdir, is_recreate=True)
    sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log'))
    print(logdir)

    data.write_vocab(logdir + '/vocab.txt')
    data.write_data(data.datas[1], logdir + '/valid.id')
    data.write_data(data.datas[2], logdir + '/test.id')
    data.write_data(nbest_list, logdir + '/nbest.id')

    # wb.rmdir(logdirs)
    with tf.Graph().as_default():
        m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0')
        # noise_lstm = lstmlm.LM(run_lstmlm_withBegToken.small_config(data), device='/gpu:1')
        # m.lstm = noise_lstm

        sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'),
                                 global_step=m.train_net.global_step)
        sv.summary_writer.add_graph(
            tf.get_default_graph())  # write the graph to logs
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        with sv.managed_session(config=session_config) as session:
            m.set_session(session)

            # print('load lstmlm for noise generator')
            # noise_lstm.restore(session,
            #                    './lstm/' + run_lstmlm_withBegToken.create_name(noise_lstm.config) + '/model.ckpt')

            m.train(sv,
                    session,
                    print_per_epoch=0.1,
                    nbest=nbest,
                    nbest_list=nbest_list)
Beispiel #3
0
def main(_):
    config = trfnce.Config(data)
    config.structure_type = 'cnn'
    config.embedding_dim = 200
    config.cnn_filters = [(i, 100) for i in range(1, 11)]
    config.cnn_width = 3
    config.cnn_layers = 3
    config.cnn_hidden = 200
    config.rnn_hidden_layers = 2
    config.rnn_hidden_size = 200
    config.rnn_predict = True
    config.batch_size = 10
    config.noise_factor = 10
    config.noise_sampler = 'lstm:lstm/lstm_e200_h200x2/model.ckpt'
    config.init_weight = 0.1
    config.optimize_method = ['adam', 'adam']
    config.lr_param = trfbase.LearningRateEpochDelay(0.001)
    config.lr_zeta = trfbase.LearningRateEpochDelay(0.01)
    config.max_epoch = 100
    # config.dropout = 0.75
    # config.init_zeta = config.get_initial_logz(20)
    config.update_zeta = True
    config.write_dbg = False
    config.print()

    # q_config = run_lstmlm.small_config(data)
    q_config = None

    name = create_name(config, q_config)
    logdir = 'trf_nce/' + name
    wb.mkdir(logdir, is_recreate=True)
    sys.stdout = wb.std_log(os.path.join(logdir, 'trf.log'))
    print(logdir)

    data.write_vocab(logdir + '/vocab.txt')
    data.write_data(data.datas[1], logdir + '/valid.id')
    data.write_data(data.datas[2], logdir + '/test.id')

    # wb.rmdir(logdirs)
    with tf.Graph().as_default():
        if q_config is None:
            m = trfnce.TRF(config, data, logdir=logdir, device='/gpu:0')
        else:
            m = trfnce.TRF(config,
                           data,
                           logdir=logdir,
                           device='/gpu:0',
                           q_model=lstmlm.LM(q_config, device='/gpu:0'))

        # s1 = trfnce.NoiseSamplerNgram(config, data, 2)
        # s2 = trfnce.NoiseSamplerLSTMEval(config, data, config.noise_sampler.split(':')[-1])

        sv = tf.train.Supervisor(logdir=os.path.join(logdir, 'logs'),
                                 global_step=m.train_net.global_step)
        sv.summary_writer.add_graph(
            tf.get_default_graph())  # write the graph to logs
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        with sv.managed_session(config=session_config) as session:
            with session.as_default():

                if m.q_model is not None:
                    print('load lstmlm for q model')
                    m.q_model.restore(
                        session, './lstm/' + run_lstmlm.create_name(q_config) +
                        '/model.ckpt')

                m.train(
                    sv,
                    session,
                    print_per_epoch=0.1,
                    operation=task.Ops(m),
                )