def main(_): vocab_mgr = dataset.VocabMgr() word_embed = vocab_mgr.load_embedding() semeval_record = semeval_v2.SemEvalCleanedRecordData(None) # load dataset train_data = semeval_record.train_data(FLAGS.num_epochs, FLAGS.batch_size) test_data = semeval_record.test_data(1, FLAGS.batch_size) # model_name = 'cnn-%d-%d' % (FLAGS.word_dim, FLAGS.num_epochs) model = cnn_model.CNNModel(word_embed, FLAGS.is_adv) # for tensor in tf.trainable_variables(): # tf.logging.info(tensor.op.name) model.train_and_eval(FLAGS.num_epochs, 80, FLAGS.lrn_rate, train_data, test_data)
def main(_): vocab_mgr = dataset.VocabMgr() word_embed = vocab_mgr.load_embedding() nyt_record = nyt2010.NYT2010CleanedRecordData(None) semeval_record = semeval_v2.SemEvalCleanedRecordData(None) with tf.Graph().as_default(): train_iter = semeval_record.train_data(FLAGS.num_epochs, FLAGS.batch_size) test_iter = semeval_record.test_data(1, FLAGS.batch_size) # unsup_iter = nyt_record.unsup_data(FLAGS.num_epochs, FLAGS.batch_size) model_name = 'cnn-%d-%d' % (FLAGS.word_dim, FLAGS.num_epochs) train_data = train_iter.get_next() test_data = test_iter.get_next() # unsup_data = unsup_iter.get_next() unsup_data = None m_train, m_valid = cnn_model.build_train_valid_model( model_name, word_embed, train_data, test_data, unsup_data, FLAGS.is_adv, FLAGS.is_test) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())# for file queue config = tf.ConfigProto() config.gpu_options.allow_growth = True for tensor in tf.trainable_variables(): tf.logging.info(tensor.op.name) with tf.Session(config=config) as sess: sess.run(init_op) print('='*80) if FLAGS.is_test: test(sess, m_valid, test_iter) else: train_semeval(sess, m_train, m_valid, test_iter)
import tensorflow as tf from inputs import dataset, semeval_v2, nyt2010 tf.logging.set_verbosity(tf.logging.INFO) semeval_text = semeval_v2.SemEvalCleanedTextData() # nyt_text = nyt2010.NYT2010CleanedTextData() # length statistics semeval_text.length_statistics() # nyt_text.length_statistics() # gen vocab vocab_mgr = dataset.VocabMgr() vocab_mgr.generate_vocab(semeval_text.tokens()) # trim embedding vocab_mgr.trim_pretrain_embedding() # build SemEval record data semeval_text.set_vocab_mgr(vocab_mgr) semeval_record = semeval_v2.SemEvalCleanedRecordData(semeval_text) semeval_record.generate_data() # build nyt record data # nyt_text.set_vocab_mgr(vocab_mgr) # nyt_record = nyt2010.NYT2010CleanedRecordData(nyt_text) # nyt_record.generate_data() # INFO:tensorflow:(percent, quantile) [(50, 17.0), (70, 21.0), (80, 24.0), (90, 29.0), (95, 33.0), (98, 40.0), (100, 98.0)] # INFO:tensorflow:(percent, quantile) [(50, 39.0), (70, 47.0), (80, 53.0), (90, 62.0), (95, 71.0), (98, 84.0), (100, 9621.0)]