Ejemplo n.º 1
0
def main(_):
    vocab_mgr = dataset.VocabMgr()
    word_embed = vocab_mgr.load_embedding()
    semeval_record = semeval_v2.SemEvalCleanedRecordData(None)

    # load dataset
    train_data = semeval_record.train_data(FLAGS.num_epochs, FLAGS.batch_size)
    test_data = semeval_record.test_data(1, FLAGS.batch_size)

    # model_name = 'cnn-%d-%d' % (FLAGS.word_dim, FLAGS.num_epochs)
    model = cnn_model.CNNModel(word_embed, FLAGS.is_adv)

    # for tensor in tf.trainable_variables():
    #   tf.logging.info(tensor.op.name)

    model.train_and_eval(FLAGS.num_epochs, 80, FLAGS.lrn_rate, train_data,
                         test_data)
Ejemplo n.º 2
0
def main(_):
  vocab_mgr = dataset.VocabMgr()
  word_embed = vocab_mgr.load_embedding()
  nyt_record = nyt2010.NYT2010CleanedRecordData(None)
  semeval_record = semeval_v2.SemEvalCleanedRecordData(None)

  with tf.Graph().as_default():
    train_iter = semeval_record.train_data(FLAGS.num_epochs, FLAGS.batch_size)
    test_iter = semeval_record.test_data(1, FLAGS.batch_size)
    # unsup_iter = nyt_record.unsup_data(FLAGS.num_epochs, FLAGS.batch_size)
                                          
    model_name = 'cnn-%d-%d' % (FLAGS.word_dim, FLAGS.num_epochs)
    train_data = train_iter.get_next()
    test_data = test_iter.get_next()
    # unsup_data = unsup_iter.get_next()
    unsup_data = None
    m_train, m_valid = cnn_model.build_train_valid_model(
                          model_name, word_embed,
                          train_data, test_data, unsup_data,
                          FLAGS.is_adv, FLAGS.is_test)

    init_op = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())# for file queue
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    for tensor in tf.trainable_variables():
      tf.logging.info(tensor.op.name)
    
    with tf.Session(config=config) as sess:
      sess.run(init_op)
      print('='*80)

      if FLAGS.is_test:
        test(sess, m_valid, test_iter)
      else:
        train_semeval(sess, m_train, m_valid, test_iter)
Ejemplo n.º 3
0
import tensorflow as tf
from inputs import dataset, semeval_v2, nyt2010

tf.logging.set_verbosity(tf.logging.INFO)

semeval_text = semeval_v2.SemEvalCleanedTextData()
# nyt_text = nyt2010.NYT2010CleanedTextData()

# length statistics
semeval_text.length_statistics()
# nyt_text.length_statistics()

# gen vocab
vocab_mgr = dataset.VocabMgr()
vocab_mgr.generate_vocab(semeval_text.tokens())

# trim embedding
vocab_mgr.trim_pretrain_embedding()

# build SemEval record data
semeval_text.set_vocab_mgr(vocab_mgr)
semeval_record = semeval_v2.SemEvalCleanedRecordData(semeval_text)
semeval_record.generate_data()

# build nyt record data
# nyt_text.set_vocab_mgr(vocab_mgr)
# nyt_record = nyt2010.NYT2010CleanedRecordData(nyt_text)
# nyt_record.generate_data()

# INFO:tensorflow:(percent, quantile) [(50, 17.0), (70, 21.0), (80, 24.0), (90, 29.0), (95, 33.0), (98, 40.0), (100, 98.0)]
# INFO:tensorflow:(percent, quantile) [(50, 39.0), (70, 47.0), (80, 53.0), (90, 62.0), (95, 71.0), (98, 84.0), (100, 9621.0)]