def train(): train_x, train_y, words_dict, labels_dict, seqlen_all = data_helper.load("train.txt", 10000, 35) test_x, test_y, seqlen_test = data_helper.load_test_data("test_filter_2.txt", seqlen, words_dict, labels_dict) model = bilstm_text(voc_size,batch_size,seqlen,n_class,embedding_size,learn_rate) op_pred = model.pred op_loss = model.loss op_train = model.train_op op_acc = model.acc sess = tf.Session() init = tf.initialize_all_variables() sess.run(init) epoachs = 50 cnt = 0 for epoach in range(epoachs): batchs = data_helper.get_batch(64, train_x, train_y, seqlen_all) for batch_x,batch_y, batch_len in batchs: [_,train_acc] = sess.run([op_train,op_acc],feed_dict={model.inputs:batch_x,model.outputs:batch_y,model.seqlen_hdr:batch_len}) print("{0} epoach {1} iters acc = {2}".format(epoach,cnt,train_acc)) if cnt % 50 == 0: tmp_pred = sess.run(op_pred,feed_dict={model.inputs:batch_x,model.outputs:batch_y,model.seqlen_hdr:batch_len}) print(tmp_pred) test(model, test_x, test_y, seqlen_test) cnt += 1 print("---------test----------------") test(model,test_x, test_y, seqlen_test)
datefmt = "%a %d %b %Y %H:%M:%S" formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) logger.addHandler(fh) #----------------------------- define a logger end ---------------------------------- #------------------------------------load data ------------------------------- embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file, FLAGS.embedding_size) ori_quests, cand_quests, neg_quests, cat_ids = load_train_data( FLAGS.train_file, word2idx, FLAGS.num_unroll_steps) test_ori_quests, test_cand_quests, labels, results, test_cat_ids = load_test_data( FLAGS.test_file, word2idx, FLAGS.num_unroll_steps) for_test_ori_quests, for_test_cand_quests, for_labels, for_results, for_test_cat_ids = load_test_data( FLAGS.train_LONG, word2idx, FLAGS.num_unroll_steps) #test_like_train_ori_quests, test_like_train_cand_quests,test_like_train_neg_quests,test_like_train_cat_ids = load_train_data(FLAGS.test_TRAIN, word2idx, FLAGS.num_unroll_steps) #----------------------------------- load data end ---------------------- def onehot_encoder(cat_ids_batch): return np.eye(CAT_NUMBER)[cat_ids_batch] #----------------------------------- execute train model --------------------------------- def run_step(sess, ori_batch,
datefmt = "%a %d %b %Y %H:%M:%S" formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) logger.addHandler(fh) #----------------------------- define a logger end ---------------------------------- #------------------------------------load data ------------------------------- embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file, FLAGS.embedding_size) ori_quests, cand_quests = load_train_data(FLAGS.train_file, word2idx, FLAGS.sequence_len) #train_quests, valid_quests = create_valid(zip(ori_quests, cand_quests)) test_ori_quests, test_cand_quests, labels, results = load_test_data( FLAGS.test_file, word2idx, FLAGS.sequence_len) #----------------------------------- load data end ---------------------- #----------------------------------- build model -------------------------------------- filter_sizes = [ int(filter_size.strip()) for filter_size in FLAGS.filter_sizes.strip().split(",") ] #----------------------------------- build model end ---------------------------------- #----------------------------------- execute train model --------------------------------- def run_step(sess, ori_batch, cand_batch,
model.add(Flatten()) model.add(Dense(500)) model.add(Activation("relu")) # softmax classifier model.add(Dense(nb_classes)) model.add(Activation("softmax")) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01), metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) X_val = load_test_data("test.csv") X_val = X_val.reshape(X_val.shape[0], 1, img_rows, img_cols) X_val = X_val.astype('float32') X_val /= 255 print(X_val.shape, 'Validation test samples') preds = model.predict_classes(X_val, batch_size=batch_size, verbose=1) save_preds(preds, "lenet_submission.csv")
#full connect layers h_drop = tf.nn.dropout(pool_flat,keep_prob) full_W = tf.Variable(tf.truncated_normal([4,n_class],stddev=0.1 ,dtype=tf.float32)) full_B = tf.Variable(tf.constant(0.1,dtype=tf.float32)) outputs = tf.nn.softmax(tf.matmul(h_drop,full_W)+full_B) pred = tf.argmax(outputs,1) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=labels)) acc = tf.reduce_mean(tf.cast(tf.equal(pred,tf.argmax(labels,1)),tf.float32)) train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) train_x, train_y, words_dict, labels_dict, all_len = data_helper.load("data/train.txt",1000,s_limit_len) test_x,test_y, testlen = data_helper.load_test_data("data/test_filter_2.txt",s_limit_len,words_dict,labels_dict) def test(sess,acc,pred,tes_x,test_y): y_pred, acc_test = sess.run([pred,acc],feed_dict={inputs:test_x,labels:test_y,keep_prob:1.0}) y_true = sess.run(tf.argmax(test_y,1)) print(metrics.classification_report(y_true,y_pred)) for epoach in range(1000): iter = 0 test(sess,acc,pred,test_x,test_y) batchs = data_helper.get_batch(64,train_x,train_y,all_len) for [batch_x,batch_y,batch_len] in batchs: _,loss_,acc_,pred_list = sess.run([train_op,loss,acc,pred],feed_dict={inputs:batch_x, labels:batch_y,keep_prob:0.5})
fh = logging.FileHandler("./run.log", mode="w") fh.setLevel(logging.INFO) fmt = "%(asctime)-15s %(levelname)s %(filename)s %(lineno)d %(process)d %(message)s" datefmt = "%a %d %b %Y %H:%M:%S" formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) logger.addHandler(fh) #----------------------------- define a logger end ---------------------------------- #------------------------------------load data ------------------------------- embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file, FLAGS.embedding_size) ori_quests, cand_quests = load_train_data(FLAGS.train_file, word2idx, FLAGS.quest_len, FLAGS.answer_len) test_ori_quests, test_cand_quests, labels, results = load_test_data(FLAGS.test_file, word2idx, FLAGS.quest_len, FLAGS.answer_len) valid_ori_quests, valid_cand_quests, valid_labels, valid_results = load_test_data(FLAGS.valid_file, word2idx, FLAGS.quest_len, FLAGS.answer_len) #----------------------------------- load data end ---------------------- #----------------------------------- execute train model --------------------------------- def run_step(sess, ori_batch, cand_batch, neg_batch, lstm, dropout=1.): start_time = time.time() feed_dict = { lstm.ori_input_quests : ori_batch, lstm.cand_input_quests : cand_batch, lstm.neg_input_quests : neg_batch, lstm.keep_prob : dropout } _, step, ori_cand_score, ori_neg_score, cur_loss, cur_acc = sess.run([train_op, global_step, lstm.ori_cand, lstm.ori_neg, lstm.loss, lstm.acc], feed_dict) time_str = datetime.datetime.now().isoformat()
fh.setFormatter(formatter) logger.addHandler(fh) #----------------------------- define a logger end ---------------------------------- #------------------------------------load data ------------------------------- embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file, FLAGS.embedding_size) ori_quests, cand_quests, neg_quests, cat_ids = load_train_data( FLAGS.train_file, word2idx, FLAGS.sequence_len) #train_quests, valid_quests = create_valid(zip(ori_quests, cand_quests)) test_ori_quests, test_cand_quests, labels, results, test_cat_ids = load_test_data( FLAGS.test_file, word2idx, FLAGS.sequence_len) # for_test_ori_quests, for_test_cand_quests, for_labels, for_results , for_test_cat_ids = load_test_data(FLAGS.train_for_test, word2idx, FLAGS.sequence_len) # test_like_train_ori_quests, test_like_train_cand_quests,test_like_train_neg_quests,test_like_train_cat_ids = load_train_data(FLAGS.test_file_like_train, word2idx, FLAGS.sequence_len) for_test_ori_quests, for_test_cand_quests, for_labels, for_results, for_test_cat_ids = load_test_data( FLAGS.train_LONG, word2idx, FLAGS.sequence_len) #test_like_train_ori_quests, test_like_train_cand_quests,test_like_train_neg_quests,test_like_train_cat_ids = load_train_data(FLAGS.test_SHORT, word2idx, FLAGS.sequence_len) #----------------------------------- load data end ---------------------- #----------------------------------- build model -------------------------------------- filter_sizes = [ int(filter_size.strip()) for filter_size in FLAGS.filter_sizes.strip().split(",") ]
tf.flags.DEFINE_string("test_file", "twitter-datasets/test_data.txt", "Path and name of test file") tf.flags.DEFINE_string("submission_filename", "submission_predictions" + str(int(time.time())), "Path and name of submission file") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") test_data = data_helper.load_test_data(FLAGS.test_file) # Map data into vocabulary vocab_path = os.path.join(os.path.curdir, "vocabulary", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(test_data))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement,
fmt = "%(asctime)-15s %(levelname)s %(filename)s %(lineno)d %(process)d %(message)s" datefmt = "%a %d %b %Y %H:%M:%S" formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) logger.addHandler(fh) #----------------------------- define a logger end ---------------------------------- #------------------------------------load data ------------------------------- embedding, word2idx, idx2word = load_embedding(FLAGS.embedding_file, FLAGS.embedding_size) ori_quests, cand_quests = load_train_data(FLAGS.train_file, word2idx, FLAGS.num_unroll_steps) test_ori_quests, test_cand_quests, labels, results = load_test_data( FLAGS.test_file, word2idx, FLAGS.num_unroll_steps) valid_ori_quests, valid_cand_quests, valid_labels, valid_results = load_test_data( FLAGS.valid_file, word2idx, FLAGS.num_unroll_steps) #----------------------------------- load data end ---------------------- #----------------------------------- execute train model --------------------------------- def run_step(sess, ori_batch, cand_batch, neg_batch, lstm, dropout=1.): start_time = time.time() feed_dict = { lstm.ori_input_quests: ori_batch, lstm.cand_input_quests: cand_batch, lstm.neg_input_quests: neg_batch, lstm.keep_prob: dropout }
}) return prediction def prob(self, x): prob = self._sess.run([self._prob], feed_dict={ self._input_x: x, self._drop_keep_prob: 1 }) return prob def score(self, x, label): acc = self._sess.run([self._acc], feed_dict={ self._input_x: x, self._input_y: label, self._drop_keep_prob: 1 }) return acc if __name__ == "main": x_text = load_test_data('twitter-datasets/test_data.txt') vocab = learn.preprocessing.VocabularyProcessor.restore('vocabulary/vocab') x_data = np.array(list(vocab.transform(x_text))) ckpt_path = 'run/1526114630/checkpoint/model-3000.meta' tmodel = test_model(ckpt_path=ckpt_path) result = tmodel.predict(x_data) create_submission_file(result[0], 'submission.txt')
model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) # set of FC => RELU layers model.add(Flatten()) model.add(Dense(500)) model.add(Activation("relu")) # softmax classifier model.add(Dense(nb_classes)) model.add(Activation("softmax")) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01), metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) X_val = load_test_data("test.csv") X_val = X_val.reshape(X_val.shape[0], 1, img_rows, img_cols) X_val = X_val.astype('float32') X_val /= 255 print(X_val.shape, 'Validation test samples') preds = model.predict_classes(X_val, batch_size=batch_size, verbose=1) save_preds(preds, "lenet_submission.csv")
"Path and name of submission file") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") test_data = data_helper.load_test_data(FLAGS.test_file) # Map data into vocabulary vocab_path = os.path.join(os.path.curdir, "vocabulary", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(test_data))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement,
import tensorflow as tf import numpy as np import data_helper from tensorflow.contrib.rnn import LSTMCell from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as birnn voc_size = 10000 batch_size = 64 seqlen = 35 learn_rate = 0.05 n_class = 2 embedding_size = 100 train_x, train_y, words_dict, labels_dict, seqlen_all = data_helper.load( "train.txt", 10000, 35) one_hot_label = tf.one_hot(train_y, n_class) test_x, test_y, seqlen_test = data_helper.load_test_data( "test_filter_2.txt", seqlen, words_dict, labels_dict) # seqlen_all = np.array(seqlen_all)*10 inputs = tf.placeholder(tf.int64, [None, seqlen], name="seq_inputs") outputs = tf.placeholder(tf.int64, [None, 2], name="outputs") seqlen_hdr = tf.placeholder(tf.int64, [None]) W_embedding = tf.Variable(tf.random_uniform(shape=[voc_size, embedding_size])) embedding = tf.nn.embedding_lookup(W_embedding, inputs) # print("embding",embedding) #embedding shape(35,100) fwcell = LSTMCell(embedding_size) bwcell = LSTMCell(embedding_size) #seqlen这里应该是一个batchsize的长度,应该是一个tensor out_bilstm, final_state = birnn(fwcell, bwcell, inputs=embedding, sequence_length=seqlen_hdr,