def create_model_1(self, inputs): inputs = tf.reshape( inputs, [-1, self.config.max_sentence_len, self.config.embedding_dim]) # outputs_sen = self.add_cnn_layer(inputs) # outputs_sen_dim = self.filter_num * len(self.filter_list) outputs_sen = self.add_bilstm_layer(inputs, self.sen_len, self.config.max_sentence_len, 'doc_sen') # doc_sen / sen outputs_sen_dim = 2 * self.config.n_hidden sen_logits = softmax_layer(outputs_sen, outputs_sen_dim, self.config.random_base, self.keep_prob2, self.config.l2_reg, self.config.n_sen_class, 'sen_softmax_') outputs_sen = tf.reshape( outputs_sen, [-1, self.config.max_doc_len, outputs_sen_dim]) outputs_doc = reduce_mean_with_len(outputs_sen, self.doc_len) logits = softmax_layer(outputs_doc, outputs_sen_dim, self.config.random_base, self.keep_prob2, self.config.l2_reg, self.config.n_class, 'doc_softmax_') return sen_logits, logits
def hn_att(inputs, sen_len, doc_len, keep_prob1, keep_prob2): print 'I am HAN!' inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) cell = tf.contrib.rnn.LSTMCell sen_len = tf.reshape(sen_len, [-1]) hiddens_sen = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, 'sentence', 'all') alpha_sen = mlp_attention_layer(hiddens_sen, sen_len, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, '1') outputs_sen = tf.reshape(tf.matmul(alpha_sen, hiddens_sen), [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden]) # sen_len = tf.reshape(sen_len, [-1, FLAGS.max_doc_len]) # alpha = 1.0 - tf.cast(tf.reshape(sen_len / (tf.reduce_sum(sen_len, 1, keep_dims=True) + 1), [-1, FLAGS.max_doc_len, 1]), tf.float32) # outputs_new = alpha * outputs_sen hiddens_doc = bi_dynamic_rnn(cell, outputs_sen, FLAGS.n_hidden, doc_len, FLAGS.max_doc_len, 'doc', 'all') alpha_doc = mlp_attention_layer(hiddens_doc, doc_len, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, '2') outputs_doc = tf.reshape(tf.matmul(alpha_doc, hiddens_doc), [-1, 2 * FLAGS.n_hidden]) prob = softmax_layer(outputs_doc, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return prob, tf.reshape(alpha_sen, [-1, FLAGS.max_doc_len, FLAGS.max_sentence_len ]), tf.reshape(alpha_doc, [-1, FLAGS.max_doc_len])
def ian(inputs, sen_len, target, sen_len_tr, keep_prob1, keep_prob2, _id='all'): cell = tf.contrib.rnn.LSTMCell # sentence hidden inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) hiddens_s = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, 'sen' + _id, 'all') pool_s = reduce_mean_with_len(hiddens_s, sen_len) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # attention sentence att_s = bilinear_attention_layer(hiddens_s, pool_t, sen_len, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sen') outputs_s = tf.squeeze(tf.matmul(att_s, hiddens_s)) # attention target att_t = bilinear_attention_layer(hiddens_t, pool_s, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 't') outputs_t = tf.squeeze(tf.matmul(att_t, hiddens_t)) outputs = tf.concat([outputs_s, outputs_t], 1) prob = softmax_layer(outputs, 4 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return prob, att_s, att_t
def hn_inter_att(inputs_o, sen_len_o, doc_len_o, inputs_r, sen_len_r, doc_len_r, keep_prob1, keep_prob2, _id='0'): print 'I am hn-inter-att!' cell = tf.contrib.rnn.LSTMCell # Original Part inputs_o = tf.nn.dropout(inputs_o, keep_prob=keep_prob1) sen_len_o = tf.reshape(sen_len_o, [-1]) hiddens_sen_o = bi_dynamic_rnn(cell, inputs_o, FLAGS.n_hidden, sen_len_o, FLAGS.max_sentence_len, 'sentence_o', 'all') alpha_sen_o = mlp_attention_layer(hiddens_sen_o, sen_len_o, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sentence_o') outputs_sen_o = tf.reshape(tf.matmul(alpha_sen_o, hiddens_sen_o), [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden]) hiddens_doc_o = bi_dynamic_rnn(cell, outputs_sen_o, FLAGS.n_hidden, doc_len_o, FLAGS.max_doc_len, 'doc_o', 'all') # Reverse Part inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1) sen_len_r = tf.reshape(sen_len_r, [-1]) hiddens_sen_r = bi_dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, sen_len_r, FLAGS.max_sentence_len, 'sentence_r', 'all') alpha_sen_r = mlp_attention_layer(hiddens_sen_r, sen_len_r, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sentence_r') outputs_sen_r = tf.reshape(tf.matmul(alpha_sen_r, hiddens_sen_r), [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden]) hiddens_doc_r = bi_dynamic_rnn(cell, outputs_sen_r, FLAGS.n_hidden, doc_len_r, FLAGS.max_doc_len, 'doc_r', 'all') # Combined Part hiddens_doc = tf.concat([hiddens_doc_o, hiddens_doc_r], 2) # batch_size * max_doc_len * 4n_hidden alpha_doc = mlp_attention_layer(hiddens_doc, doc_len_o, 4 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'doc') outputs_doc = tf.reshape(tf.matmul(alpha_doc, hiddens_doc), [-1, 4 * FLAGS.n_hidden]) logits = softmax_layer(outputs_doc, 4 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return logits
def ian(inputs_l, len_l, inputs_r, len_r, keep_prob1, keep_prob2, _id='1'): cell = tf.contrib.rnn.LSTMCell # left hidden inputs_l = tf.nn.dropout(inputs_l, keep_prob=keep_prob1) hiddens_l = dynamic_rnn(cell, inputs_l, FLAGS.n_hidden, len_l, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = tf.reduce_mean(hiddens_l, 1, keep_dims=False) # right hidden inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1) hiddens_r = dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, len_r, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = tf.reduce_mean(hiddens_r, 1, keep_dims=False) # attention left att_l = bilinear_attention_layer(hiddens_l, pool_r, len_l, FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'l') outputs_l = tf.squeeze(tf.matmul(att_l, hiddens_l)) # attention right att_r = bilinear_attention_layer(hiddens_r, pool_l, len_r, FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'r') outputs_r = tf.squeeze(tf.matmul(att_r, hiddens_r)) outputs = tf.concat([outputs_l, outputs_r], 1) prob = softmax_layer(outputs, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return prob
def lstm_sen(inputs, sen_len, keep_prob1, keep_prob2): print 'I am lstm-sen!' cell = tf.contrib.rnn.LSTMCell inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) hiddens = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, 'sentence_o', 'last') logits = softmax_layer(hiddens, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return logits
def bi_rnn(inputs, sen_len, keep_prob1, keep_prob2, _id='1'): print 'I am bi-rnn.' inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) cell = tf.contrib.rnn.LSTMCell hiddens = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, 'sentence' + _id, FLAGS.t1) return softmax_layer(hiddens, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class, _id)
def bi_rnn_att(inputs, sen_len, keep_prob1, keep_prob2, _id='1'): inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) cell = tf.contrib.rnn.LSTMCell hiddens_sen = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, _id, 'all') alpha_sen = mlp_attention_layer(hiddens_sen, sen_len, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, _id) outputs_sen = tf.squeeze(tf.matmul(alpha_sen, hiddens_sen)) return softmax_layer(outputs_sen, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class, _id)
def lstm_att_sen(inputs, sen_len, keep_prob1, keep_prob2): print 'I am lstm-att-sen!' cell = tf.contrib.rnn.LSTMCell inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) hiddens = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, 'sentence_o', 'all') alpha = mlp_attention_layer(hiddens, sen_len, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sen') outputs = tf.reshape(tf.matmul(alpha, hiddens), [-1, 2 * FLAGS.n_hidden]) logits = softmax_layer(outputs, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return logits
def dual_sen(inputs_o, sen_len_o, inputs_r, sen_len_r, keep_prob1, keep_prob2): print 'I am dual-sen!' cell = tf.contrib.rnn.LSTMCell inputs_o = tf.nn.dropout(inputs_o, keep_prob=keep_prob1) hiddens_o = bi_dynamic_rnn(cell, inputs_o, FLAGS.n_hidden, sen_len_o, FLAGS.max_sentence_len, 'sentence_o', 'last') inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, sen_len_r, FLAGS.max_sentence_len, 'sentence_r', 'last') hiddens = tf.concat([hiddens_o, hiddens_r], 1) logits = softmax_layer(hiddens, 4 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return logits
def hn(inputs, sen_len, doc_len, keep_prob1, keep_prob2, id_=1): print 'I am HN!' inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1) cell = tf.contrib.rnn.LSTMCell sen_len = tf.reshape(sen_len, [-1]) hiddens_sen = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len, FLAGS.max_sentence_len, 'sentence' + str(id_), FLAGS.t1) hiddens_sen = tf.reshape(hiddens_sen, [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden]) hidden_doc = bi_dynamic_rnn(cell, hiddens_sen, FLAGS.n_hidden, doc_len, FLAGS.max_doc_len, 'doc' + str(id_), FLAGS.t2) return softmax_layer(hidden_doc, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
def create_model_2(self, inputs): inputs = tf.reshape( inputs, [-1, self.config.max_sentence_len, self.config.embedding_dim]) outputs_sen = self.add_bilstm_layer(inputs, self.sen_len, self.config.max_sentence_len, 'sen') outputs_sen = tf.reshape( outputs_sen, [-1, self.config.max_doc_len, 2 * self.config.n_hidden]) outputs_doc = self.add_bilstm_layer(outputs_sen, self.doc_len, self.config.max_doc_len, 'doc') return softmax_layer(outputs_doc, 2 * self.config.n_hidden, self.config.random_base, self.keep_prob2, self.config.l2_reg, self.config.n_class, 'doc_softmax')
def dual_att_sen(inputs_o, sen_len_o, inputs_r, sen_len_r, keep_prob1, keep_prob2): print 'I am dual-att-sen!' cell = tf.contrib.rnn.LSTMCell inputs_o = tf.nn.dropout(inputs_o, keep_prob=keep_prob1) hiddens_o = bi_dynamic_rnn(cell, inputs_o, FLAGS.n_hidden, sen_len_o, FLAGS.max_sentence_len, 'sentence_o', 'all') inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, sen_len_r, FLAGS.max_sentence_len, 'sentence_r', 'all') hiddens = tf.concat([hiddens_o, hiddens_r], 2) alpha = mlp_attention_layer(hiddens, sen_len_o, 4 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sen') outputs = tf.reshape(tf.matmul(alpha, hiddens), [-1, 4 * FLAGS.n_hidden]) logits = softmax_layer(outputs, 4 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return logits
def lcr_rot(input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, _id='all'): print('I am lcr_rot.') cell = tf.contrib.rnn.LSTMCell # left hidden input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1) hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') # hiddens_l = dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all') pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw) # right hidden input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1) hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') # hiddens_r = dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all') pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw) # target hidden target = tf.nn.dropout(target, keep_prob=keep_prob1) hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') # hiddens_t = dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all') pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr) # pool_t = tf.concat(1, [target, target]) # attention target att_t_l = bilinear_attention_layer(hiddens_t, pool_l, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'tl') # att_t_l = bilinear_attention_layer(hiddens_t, pool_l, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'tl') outputs_t_l = tf.squeeze(tf.matmul(att_t_l, hiddens_t)) att_t_r = bilinear_attention_layer(hiddens_t, pool_r, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'tr') # att_t_r = bilinear_attention_layer(hiddens_t, pool_r, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'tr') outputs_t_r = tf.squeeze(tf.matmul(att_t_r, hiddens_t)) # attention left att_l = bilinear_attention_layer(hiddens_l, outputs_t_l, sen_len_fw, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'l') outputs_l = tf.squeeze(tf.matmul(att_l, hiddens_l)) # # attention right att_r = bilinear_attention_layer(hiddens_r, outputs_t_r, sen_len_bw, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'r') outputs_r = tf.squeeze(tf.matmul(att_r, hiddens_r)) outputs = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1) # outputs = (outputs_l + outputs_r + outputs_t_l + outputs_t_r) / 4.0 prob = softmax_layer(outputs, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class) return prob, att_l, att_r, att_t_l, att_t_r
def create_model(self, inputs): inputs = tf.reshape( inputs, [-1, self.config.max_sentence_len, self.config.embedding_dim]) inputs = tf.nn.dropout(inputs, keep_prob=self.keep_prob1) cell = tf.contrib.rnn.LSTMCell outputs_dim = 2 * self.config.n_hidden outputs_sen = bi_dynamic_rnn(cell, inputs, self.config.n_hidden, tf.reshape(self.sen_len, [-1]), self.config.max_sentence_len, 'sen', 'last') outputs_sen = tf.reshape(outputs_sen, [-1, self.config.max_doc_len, outputs_dim]) outputs_doc = bi_dynamic_rnn(cell, outputs_sen, self.config.n_hidden, self.doc_len, self.config.max_doc_len, 'doc', 'last') doc_logits = softmax_layer(outputs_doc, outputs_dim, self.config.random_base, self.keep_prob2, self.config.l2_reg, self.config.n_class, 'doc') return doc_logits
def add_softmax_layer(self, inputs): inputs = tf.nn.dropout(inputs, keep_prob=self.keep_prob2) return softmax_layer(inputs, self.filter_num*len(self.filter_list), self.config.random_base, self.keep_prob2, self.config.l2_reg, self.config.n_class)
def main(_): word_id_mapping_o, w2v_o = load_w2v(FLAGS.embedding_file, FLAGS.embedding_dim, True) # word_id_mapping_o, w2v_o = load_word_embedding(FLAGS.word_id_file, FLAGS.embedding_file, FLAGS.embedding_dim, True) word_embedding_o = tf.constant(w2v_o, dtype=tf.float32) # word_id_mapping_r, w2v_r = load_w2v(FLAGS.embedding_file_r, FLAGS.embedding_dim, True) # word_id_mapping_r, w2v_r = load_word_embedding(FLAGS.word_id_file, FLAGS.embedding_file_r, FLAGS.embedding_dim, True) word_id_mapping_r = word_id_mapping_o word_embedding_r = tf.constant(w2v_o, dtype=tf.float32) with tf.name_scope('inputs'): keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) x_o = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len, FLAGS.max_sentence_len]) x_r = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len, FLAGS.max_sentence_len]) sen_len_o = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len]) sen_len_r = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len]) doc_len_o = tf.placeholder(tf.int32, None) doc_len_r = tf.placeholder(tf.int32, None) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) with tf.device('/gpu:0'): inputs_o = tf.nn.embedding_lookup(word_embedding_o, x_o) inputs_o = tf.reshape( inputs_o, [-1, FLAGS.max_sentence_len, FLAGS.embedding_dim]) if FLAGS.method == 'ATT': h_o = hn_att(inputs_o, sen_len_o, doc_len_o, keep_prob1, keep_prob2, 'o') else: h_o = hn(inputs_o, sen_len_o, doc_len_o, keep_prob1, keep_prob2, 'o') prob_o = softmax_layer(h_o, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class, 'o') with tf.device('/gpu:1'): inputs_r = tf.nn.embedding_lookup(word_embedding_r, x_r) inputs_r = tf.reshape( inputs_r, [-1, FLAGS.max_sentence_len, FLAGS.embedding_dim]) if FLAGS.method == 'ATT': h_r = hn_att(inputs_r, sen_len_r, doc_len_r, keep_prob1, keep_prob2, 'r') else: h_r = hn(inputs_r, sen_len_r, doc_len_r, keep_prob1, keep_prob2, 'r') prob_r = softmax_layer(h_r, 2 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class, 'r') with tf.name_scope('loss'): r_y = tf.reverse(y, [True]) reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # loss = - tf.reduce_mean(y * tf.log(prob_o)) - tf.reduce_mean(r_y * tf.log(prob_r)) + sum(reg_loss) # prob = FLAGS.alpha * prob_o + (1.0 - FLAGS.alpha) * tf.reverse(prob_r, [False, True]) loss_o = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prob_o, labels=y)) loss_r = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prob_r, labels=y)) loss = loss_o + loss_r + tf.add_n(reg_loss) prob = FLAGS.alpha * prob_o + (1.0 - FLAGS.alpha) * tf.reverse( prob_r, [True]) all_vars = [var for var in tf.global_variables()] with tf.name_scope('train'): global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) grads, global_norm = tf.clip_by_global_norm( tf.gradients(loss, all_vars), 5.0) train_op = optimizer.apply_gradients(zip(grads, all_vars), name='train_op', global_step=global_step) with tf.name_scope('predict'): cor_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) acc_prob = tf.reduce_mean(tf.cast(cor_pred, tf.float32)) acc_num = tf.reduce_sum(tf.cast(cor_pred, tf.int32)) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) conf = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=conf) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' saver = saver_func(save_dir) init = tf.global_variables_initializer() sess.run(init) # saver.restore(sess, '/-') tr_x, tr_y, tr_sen_len, tr_doc_len = load_inputs_document( FLAGS.train_file, word_id_mapping_o, FLAGS.max_sentence_len, FLAGS.max_doc_len) te_x, te_y, te_sen_len, te_doc_len = load_inputs_document( FLAGS.test_file, word_id_mapping_o, FLAGS.max_sentence_len, FLAGS.max_doc_len) tr_x_r, tr_y_r, tr_sen_len_r, tr_doc_len_r = load_inputs_document( FLAGS.train_file_r, word_id_mapping_r, FLAGS.max_sentence_len, FLAGS.max_doc_len) te_x_r, te_y_r, te_sen_len_r, te_doc_len_r = load_inputs_document( FLAGS.test_file_r, word_id_mapping_r, FLAGS.max_sentence_len, FLAGS.max_doc_len) # v_x, v_y, v_sen_len, v_doc_len = load_inputs_document( # FLAGS.validate_file, # word_id_mapping, # FLAGS.max_sentence_len, # FLAGS.max_doc_len # ) # v_x, v_y, v_sen_len, v_doc_len = load_inputs_document( # FLAGS.validate_file, # word_id_mapping, # FLAGS.max_sentence_len, # FLAGS.max_doc_len # ) def get_batch_data(xo, slo, dlo, xr, slr, dlr, yy, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yy), batch_size, 1, is_shuffle): feed_dict = { x_o: xo[index], x_r: xr[index], y: yy[index], sen_len_o: slo[index], sen_len_r: slr[index], doc_len_o: dlo[index], doc_len_r: dlr[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc, max_prob, step = 0., None, None max_ty, max_py = None, None for i in xrange(FLAGS.n_iter): for train, _ in get_batch_data(tr_x, tr_sen_len, tr_doc_len, tr_x_r, tr_sen_len_r, tr_doc_len_r, tr_y, FLAGS.batch_size, FLAGS.keep_prob1, FLAGS.keep_prob2): _, step, summary = sess.run( [train_op, global_step, train_summary_op], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) acc, cost, cnt = 0., 0., 0 p, ty, py = [], [], [] for test, num in get_batch_data(te_x, te_sen_len, te_doc_len, te_x_r, te_sen_len_r, te_doc_len_r, te_y, 2000, 1.0, 1.0, False): _loss, _acc, _p, _ty, _py = sess.run( [loss, acc_num, prob, true_y, pred_y], feed_dict=test) p += list(_p) ty += list(_ty) py += list(_py) acc += _acc cost += _loss * num cnt += num print 'all samples={}, correct prediction={}'.format(cnt, acc) acc = acc / cnt cost = cost / cnt print 'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format( i, cost, acc) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) if acc > max_acc: max_acc = acc max_prob = p max_ty = ty max_py = py # saver.save(sess, save_dir, global_step=step) print 'P:', precision_score(max_ty, max_py, average=None) print 'R:', recall_score(max_ty, max_py, average=None) print 'F:', f1_score(max_ty, max_py, average=None) fp = open(FLAGS.prob_file, 'w') for item in max_prob: fp.write(' '.join([str(it) for it in item]) + '\n') print 'Optimization Finished! Max acc={}'.format(max_acc) print 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format( FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg)