예제 #1
0
def main(_):

    config = get_config()
    name = create_name(config)
    logdir = wb.mkdir('./trf_nn_char/' + name, is_recreate=False)
    sys.stdout = wb.std_log(logdir + '/trf.log')
    config.pprint()
    print(logdir)

    # word-embedding
    # config.config_trf.load_embedding_path = os.path.join(logdir, 'word_emb.txt')
    # config.config_trf.update_embedding = False
    # data.word2vec(config.config_trf.load_embedding_path, config.config_trf.embedding_dim, 0)

    # write data
    data.write_vocab(logdir + '/vocab.txt')
    data.write_char_vocab(logdir + '/vocab_char.txt',
                          logdir + '/vocab_w2c.txt')
    data.write_data(data.datas[0], logdir + '/train.id')
    data.write_data(data.datas[1], logdir + '/valid.id')
    data.write_data(data.datas[2], logdir + '/test.id')
    data.write_data(nbest_list, logdir + '/nbest.id')

    with tf.Graph().as_default():
        m = trf.TRF(config, data, logdir=logdir, device=['/gpu:0'])

        sv = tf.train.Supervisor(logdir=logdir + '/logs',
                                 summary_op=None,
                                 global_step=m.global_steps)
        # sv.summary_writer.add_graph(tf.get_default_graph())  # write the graph to logs
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        with sv.managed_session(config=session_config) as session:
            m.set_session(session)

            # print(m.get_log_probs(data.datas[1][1]))

            # train_seqs = data.datas[0][0: config.config_trf.train_batch_size]
            # sample_seqs = data.datas[0][0: config.config_trf.sample_batch_size]
            # inputs, lengths = reader.produce_data_to_trf(train_seqs + sample_seqs)
            # m.net_trf.run_train(session, inputs, lengths, len(train_seqs))

            # m.pre_train(sv, session, batch_size=20, max_epoch=10, lr=1e-3)

            m.train(sv,
                    session,
                    print_per_epoch=0.1,
                    nbest=nbest,
                    nbest_list=nbest_list)
예제 #2
0
def main(_):

    # print(data.datas[0][0: 10])
    # print(data.get_max_len())
    # return

    config = get_config()
    name = create_name(config)
    logdir = wb.mkdir('./trf_cnn_new/' + name, is_recreate=True)
    sys.stdout = wb.std_log(logdir + '/trf.log')
    config.pprint()
    print(logdir)

    # word-embedding
    # config.config_trf.load_embedding_path = os.path.join(logdir, 'word_emb.txt')
    # config.config_trf.update_embedding = False
    # data.word2vec(config.config_trf.load_embedding_path, config.config_trf.embedding_dim, 0)

    # write data
    data.write_vocab(logdir + '/vocab.txt')
    data.write_data(data.datas[0], logdir + '/train.id')
    data.write_data(data.datas[1], logdir + '/valid.id')
    data.write_data(data.datas[2], logdir + '/test.id')
    data.write_data(nbest_list, logdir + '/nbest.id')

    with tf.Graph().as_default():
        m = trf.TRF(config, data, logdir=logdir, device='/gpu:0')

        sv = tf.train.Supervisor(logdir=logdir + '/logs',
                                 summary_op=None,
                                 global_step=m.global_steps)
        # sv.summary_writer.add_graph(tf.get_default_graph())  # write the graph to logs
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        with sv.managed_session(config=session_config) as session:
            m.set_session(session)

            # m.pre_train(sv, session, batch_size=100, max_epoch=3, lr=1.)

            m.train(sv,
                    session,
                    print_per_epoch=0.1,
                    nbest=nbest,
                    nbest_list=nbest_list)
예제 #3
0
def main(_):

    config = get_config()
    name = create_name(config)
    logdir = wb.mkdir('./trf_nn_char/' + name, is_recreate=True)
    sys.stdout = wb.std_log(logdir + '/trf.log')
    config.pprint()
    print(logdir)

    # write data
    data.write_vocab(logdir + '/vocab.txt')
    data.write_char_vocab(logdir + '/vocab_char.txt',
                          logdir + '/vocab_w2c.txt')
    data.write_data(data.datas[0], logdir + '/train.id')
    data.write_data(data.datas[1], logdir + '/valid.id')
    data.write_data(data.datas[2], logdir + '/test.id')
    data.write_data(nbest_list, logdir + '/nbest.id')

    with tf.Graph().as_default():
        m = trf.TRF(config, data, logdir=logdir, device=['/gpu:0'])

        sv = tf.train.Supervisor(logdir=logdir + '/logs',
                                 summary_op=None,
                                 global_step=m.global_steps)
        # sv.summary_writer.add_graph(tf.get_default_graph())  # write the graph to logs
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        with sv.managed_session(config=session_config) as session:
            m.set_session(session)

            # print(m.get_log_probs(data.datas[1][1]))

            m.train(sv,
                    session,
                    print_per_epoch=0.1,
                    nbest=nbest,
                    nbest_list=nbest_list)