def main(mode): if mode == 'train': model = TextCNN(embedding_weights) model.bulid_graph() model.train((x_train, y_train), (x_test, y_test)) elif mode == 'test': model = TextCNN(embedding_weights) model.bulid_graph() model.test((x_test, y_test), '1528038283')
# -*- coding: utf-8 -*- import tensorflow.keras as keras import numpy as np from sklearn import metrics import os from preprocess import preprocesser from config import Config from model import TextCNN from model import LSTM np.random.seed(42) if __name__ == '__main__': CNN_model = TextCNN() CNN_model.train(5) CNN_model.test() # LSTM_MODEL = LSTM() # LSTM_MODEL.train(5) # LSTM_MODEL.test()
def main(args): print "loadding reviews and labels from dataset" data = pd.read_csv('data/labeledTrainData.tsv.zip', compression='zip', delimiter='\t', header=0, quoting=3) reviews = data["review"] labels = list(data['sentiment']) sentences = [] for review in reviews: if len(review) > 0: sentences.append( utils.review_to_wordlist(review.decode('utf8').strip(), remove_stopwords=True)) print "loaded %d reviews from dataset" % len(sentences) word_dict = utils.build_vocab(sentences, max_words=10000) vec_reviews = utils.vectorize(sentences, word_dict, verbose=True) train_x = vec_reviews[0:20000] train_y = labels[0:20000] train_y = utils.one_hot(train_y, args.nb_classes) test_x = vec_reviews[20000:] test_y = labels[20000:] test_y = utils.one_hot(test_y, args.nb_classes) save_dir = args.save_dir log_dir = args.log_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) with tf.Graph().as_default(): config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) if args.model_type == "cnn": model = TextCNN(args, "TextCNN") test_batch = utils.get_batches(test_x, test_y, args.max_size) elif args.model_type in ["rnn", "bi_rnn"]: model = TextRNN(args, "TextRNN") test_batch = utils.get_batches(test_x, test_y, args.max_size, type="rnn") sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) for epoch in range(1, args.nb_epochs + 1): print "epoch %d start" % epoch print "- " * 50 loss = 0. total_reviews = 0 accuracy = 0. if args.model_type == "cnn": train_batch = utils.get_batches(train_x, train_y, args.batch_size) elif args.model_type in ["rnn", "bi_rnn"]: train_batch = utils.get_batches(train_x, train_y, args.batch_size, type="rnn") epoch_start_time = time.time() step_start_time = epoch_start_time for idx, batch in enumerate(train_batch): reviews, reviews_length, labels = batch _, loss_t, accuracy_t, global_step, batch_size, summaries = model.train( sess, reviews, reviews_length, labels, args.keep_prob) loss += loss_t * batch_size total_reviews += batch_size accuracy += accuracy_t * batch_size summary_writer.add_summary(summaries, global_step) if global_step % 50 == 0: print "epoch %d, step %d, loss %f, accuracy %.4f, time %.2fs" % \ (epoch, global_step, loss_t, accuracy_t, time.time() - step_start_time) step_start_time = time.time() epoch_time = time.time() - epoch_start_time print "%.2f seconds in this epoch" % (epoch_time) print "train loss %f, train accuracy %.4f" % ( loss / total_reviews, accuracy / total_reviews) total_reviews = 0 accuracy = 0. for batch in test_batch: reviews, reviews_length, labels = batch accuracy_t, batch_size = model.test(sess, reviews, reviews_length, labels, 1.0) total_reviews += batch_size accuracy += accuracy_t * batch_size print "accuracy %.4f in %d test reviews" % ( accuracy / total_reviews, total_reviews)