Exemple #1
0
    def create_model_1(self, inputs):
        inputs = tf.reshape(
            inputs,
            [-1, self.config.max_sentence_len, self.config.embedding_dim])

        # outputs_sen = self.add_cnn_layer(inputs)
        # outputs_sen_dim = self.filter_num * len(self.filter_list)

        outputs_sen = self.add_bilstm_layer(inputs, self.sen_len,
                                            self.config.max_sentence_len,
                                            'doc_sen')  # doc_sen / sen
        outputs_sen_dim = 2 * self.config.n_hidden

        sen_logits = softmax_layer(outputs_sen, outputs_sen_dim,
                                   self.config.random_base, self.keep_prob2,
                                   self.config.l2_reg, self.config.n_sen_class,
                                   'sen_softmax_')
        outputs_sen = tf.reshape(
            outputs_sen, [-1, self.config.max_doc_len, outputs_sen_dim])
        outputs_doc = reduce_mean_with_len(outputs_sen, self.doc_len)

        logits = softmax_layer(outputs_doc, outputs_sen_dim,
                               self.config.random_base, self.keep_prob2,
                               self.config.l2_reg, self.config.n_class,
                               'doc_softmax_')
        return sen_logits, logits
Exemple #2
0
def hn_att(inputs, sen_len, doc_len, keep_prob1, keep_prob2):
    print 'I am HAN!'
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    cell = tf.contrib.rnn.LSTMCell
    sen_len = tf.reshape(sen_len, [-1])
    hiddens_sen = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                                 FLAGS.max_sentence_len, 'sentence', 'all')
    alpha_sen = mlp_attention_layer(hiddens_sen, sen_len, 2 * FLAGS.n_hidden,
                                    FLAGS.l2_reg, FLAGS.random_base, '1')
    outputs_sen = tf.reshape(tf.matmul(alpha_sen, hiddens_sen),
                             [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden])

    # sen_len = tf.reshape(sen_len, [-1, FLAGS.max_doc_len])
    # alpha = 1.0 - tf.cast(tf.reshape(sen_len / (tf.reduce_sum(sen_len, 1, keep_dims=True) + 1), [-1, FLAGS.max_doc_len, 1]), tf.float32)
    # outputs_new = alpha * outputs_sen

    hiddens_doc = bi_dynamic_rnn(cell, outputs_sen, FLAGS.n_hidden, doc_len,
                                 FLAGS.max_doc_len, 'doc', 'all')
    alpha_doc = mlp_attention_layer(hiddens_doc, doc_len, 2 * FLAGS.n_hidden,
                                    FLAGS.l2_reg, FLAGS.random_base, '2')
    outputs_doc = tf.reshape(tf.matmul(alpha_doc, hiddens_doc),
                             [-1, 2 * FLAGS.n_hidden])

    prob = softmax_layer(outputs_doc, 2 * FLAGS.n_hidden, FLAGS.random_base,
                         keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return prob, tf.reshape(alpha_sen,
                            [-1, FLAGS.max_doc_len, FLAGS.max_sentence_len
                             ]), tf.reshape(alpha_doc, [-1, FLAGS.max_doc_len])
Exemple #3
0
def ian(inputs,
        sen_len,
        target,
        sen_len_tr,
        keep_prob1,
        keep_prob2,
        _id='all'):
    cell = tf.contrib.rnn.LSTMCell
    # sentence hidden
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    hiddens_s = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                               FLAGS.max_sentence_len, 'sen' + _id, 'all')
    pool_s = reduce_mean_with_len(hiddens_s, sen_len)
    # target hidden
    target = tf.nn.dropout(target, keep_prob=keep_prob1)
    hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr,
                               FLAGS.max_sentence_len, 't' + _id, 'all')
    pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr)

    # attention sentence
    att_s = bilinear_attention_layer(hiddens_s, pool_t, sen_len,
                                     2 * FLAGS.n_hidden, FLAGS.l2_reg,
                                     FLAGS.random_base, 'sen')
    outputs_s = tf.squeeze(tf.matmul(att_s, hiddens_s))
    # attention target
    att_t = bilinear_attention_layer(hiddens_t, pool_s, sen_len_tr,
                                     2 * FLAGS.n_hidden, FLAGS.l2_reg,
                                     FLAGS.random_base, 't')
    outputs_t = tf.squeeze(tf.matmul(att_t, hiddens_t))

    outputs = tf.concat([outputs_s, outputs_t], 1)
    prob = softmax_layer(outputs, 4 * FLAGS.n_hidden, FLAGS.random_base,
                         keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return prob, att_s, att_t
Exemple #4
0
def hn_inter_att(inputs_o, sen_len_o, doc_len_o, inputs_r, sen_len_r, doc_len_r, keep_prob1, keep_prob2, _id='0'):
    print 'I am hn-inter-att!'
    cell = tf.contrib.rnn.LSTMCell
    # Original Part
    inputs_o = tf.nn.dropout(inputs_o, keep_prob=keep_prob1)
    sen_len_o = tf.reshape(sen_len_o, [-1])
    hiddens_sen_o = bi_dynamic_rnn(cell, inputs_o, FLAGS.n_hidden, sen_len_o, FLAGS.max_sentence_len, 'sentence_o', 'all')
    alpha_sen_o = mlp_attention_layer(hiddens_sen_o, sen_len_o, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sentence_o')
    outputs_sen_o = tf.reshape(tf.matmul(alpha_sen_o, hiddens_sen_o), [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden])
    hiddens_doc_o = bi_dynamic_rnn(cell, outputs_sen_o, FLAGS.n_hidden, doc_len_o, FLAGS.max_doc_len, 'doc_o', 'all')

    # Reverse Part
    inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1)
    sen_len_r = tf.reshape(sen_len_r, [-1])
    hiddens_sen_r = bi_dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, sen_len_r, FLAGS.max_sentence_len, 'sentence_r', 'all')
    alpha_sen_r = mlp_attention_layer(hiddens_sen_r, sen_len_r, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'sentence_r')
    outputs_sen_r = tf.reshape(tf.matmul(alpha_sen_r, hiddens_sen_r), [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden])
    hiddens_doc_r = bi_dynamic_rnn(cell, outputs_sen_r, FLAGS.n_hidden, doc_len_r, FLAGS.max_doc_len, 'doc_r', 'all')

    # Combined Part
    hiddens_doc = tf.concat([hiddens_doc_o, hiddens_doc_r], 2)  # batch_size * max_doc_len * 4n_hidden
    alpha_doc = mlp_attention_layer(hiddens_doc, doc_len_o, 4 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'doc')
    outputs_doc = tf.reshape(tf.matmul(alpha_doc, hiddens_doc), [-1, 4 * FLAGS.n_hidden])
    logits = softmax_layer(outputs_doc, 4 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return logits
Exemple #5
0
def ian(inputs_l, len_l, inputs_r, len_r, keep_prob1, keep_prob2, _id='1'):
    cell = tf.contrib.rnn.LSTMCell
    # left hidden
    inputs_l = tf.nn.dropout(inputs_l, keep_prob=keep_prob1)
    hiddens_l = dynamic_rnn(cell, inputs_l, FLAGS.n_hidden, len_l,
                            FLAGS.max_sentence_len, 'l' + _id, 'all')
    pool_l = tf.reduce_mean(hiddens_l, 1, keep_dims=False)
    # right hidden
    inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1)
    hiddens_r = dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, len_r,
                            FLAGS.max_sentence_len, 'r' + _id, 'all')
    pool_r = tf.reduce_mean(hiddens_r, 1, keep_dims=False)
    # attention left
    att_l = bilinear_attention_layer(hiddens_l, pool_r, len_l, FLAGS.n_hidden,
                                     FLAGS.l2_reg, FLAGS.random_base, 'l')
    outputs_l = tf.squeeze(tf.matmul(att_l, hiddens_l))
    # attention right
    att_r = bilinear_attention_layer(hiddens_r, pool_l, len_r, FLAGS.n_hidden,
                                     FLAGS.l2_reg, FLAGS.random_base, 'r')
    outputs_r = tf.squeeze(tf.matmul(att_r, hiddens_r))

    outputs = tf.concat([outputs_l, outputs_r], 1)
    prob = softmax_layer(outputs, 2 * FLAGS.n_hidden, FLAGS.random_base,
                         keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return prob
Exemple #6
0
def lstm_sen(inputs, sen_len, keep_prob1, keep_prob2):
    print 'I am lstm-sen!'
    cell = tf.contrib.rnn.LSTMCell
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    hiddens = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                             FLAGS.max_sentence_len, 'sentence_o', 'last')
    logits = softmax_layer(hiddens, 2 * FLAGS.n_hidden, FLAGS.random_base,
                           keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return logits
Exemple #7
0
def bi_rnn(inputs, sen_len, keep_prob1, keep_prob2, _id='1'):
    print 'I am bi-rnn.'
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    cell = tf.contrib.rnn.LSTMCell
    hiddens = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                             FLAGS.max_sentence_len, 'sentence' + _id,
                             FLAGS.t1)
    return softmax_layer(hiddens, 2 * FLAGS.n_hidden, FLAGS.random_base,
                         keep_prob2, FLAGS.l2_reg, FLAGS.n_class, _id)
Exemple #8
0
def bi_rnn_att(inputs, sen_len, keep_prob1, keep_prob2, _id='1'):
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    cell = tf.contrib.rnn.LSTMCell
    hiddens_sen = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                                 FLAGS.max_sentence_len, _id, 'all')
    alpha_sen = mlp_attention_layer(hiddens_sen, sen_len, 2 * FLAGS.n_hidden,
                                    FLAGS.l2_reg, FLAGS.random_base, _id)
    outputs_sen = tf.squeeze(tf.matmul(alpha_sen, hiddens_sen))

    return softmax_layer(outputs_sen, 2 * FLAGS.n_hidden, FLAGS.random_base,
                         keep_prob2, FLAGS.l2_reg, FLAGS.n_class, _id)
Exemple #9
0
def lstm_att_sen(inputs, sen_len, keep_prob1, keep_prob2):
    print 'I am lstm-att-sen!'
    cell = tf.contrib.rnn.LSTMCell
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    hiddens = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                             FLAGS.max_sentence_len, 'sentence_o', 'all')

    alpha = mlp_attention_layer(hiddens, sen_len, 2 * FLAGS.n_hidden,
                                FLAGS.l2_reg, FLAGS.random_base, 'sen')
    outputs = tf.reshape(tf.matmul(alpha, hiddens), [-1, 2 * FLAGS.n_hidden])
    logits = softmax_layer(outputs, 2 * FLAGS.n_hidden, FLAGS.random_base,
                           keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return logits
Exemple #10
0
def dual_sen(inputs_o, sen_len_o, inputs_r, sen_len_r, keep_prob1, keep_prob2):
    print 'I am dual-sen!'
    cell = tf.contrib.rnn.LSTMCell
    inputs_o = tf.nn.dropout(inputs_o, keep_prob=keep_prob1)
    hiddens_o = bi_dynamic_rnn(cell, inputs_o, FLAGS.n_hidden, sen_len_o,
                               FLAGS.max_sentence_len, 'sentence_o', 'last')

    inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1)
    hiddens_r = bi_dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, sen_len_r,
                               FLAGS.max_sentence_len, 'sentence_r', 'last')

    hiddens = tf.concat([hiddens_o, hiddens_r], 1)
    logits = softmax_layer(hiddens, 4 * FLAGS.n_hidden, FLAGS.random_base,
                           keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return logits
Exemple #11
0
def hn(inputs, sen_len, doc_len, keep_prob1, keep_prob2, id_=1):
    print 'I am HN!'
    inputs = tf.nn.dropout(inputs, keep_prob=keep_prob1)
    cell = tf.contrib.rnn.LSTMCell
    sen_len = tf.reshape(sen_len, [-1])
    hiddens_sen = bi_dynamic_rnn(cell, inputs, FLAGS.n_hidden, sen_len,
                                 FLAGS.max_sentence_len, 'sentence' + str(id_),
                                 FLAGS.t1)
    hiddens_sen = tf.reshape(hiddens_sen,
                             [-1, FLAGS.max_doc_len, 2 * FLAGS.n_hidden])
    hidden_doc = bi_dynamic_rnn(cell, hiddens_sen, FLAGS.n_hidden, doc_len,
                                FLAGS.max_doc_len, 'doc' + str(id_), FLAGS.t2)

    return softmax_layer(hidden_doc, 2 * FLAGS.n_hidden, FLAGS.random_base,
                         keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
Exemple #12
0
 def create_model_2(self, inputs):
     inputs = tf.reshape(
         inputs,
         [-1, self.config.max_sentence_len, self.config.embedding_dim])
     outputs_sen = self.add_bilstm_layer(inputs, self.sen_len,
                                         self.config.max_sentence_len,
                                         'sen')
     outputs_sen = tf.reshape(
         outputs_sen,
         [-1, self.config.max_doc_len, 2 * self.config.n_hidden])
     outputs_doc = self.add_bilstm_layer(outputs_sen, self.doc_len,
                                         self.config.max_doc_len, 'doc')
     return softmax_layer(outputs_doc, 2 * self.config.n_hidden,
                          self.config.random_base, self.keep_prob2,
                          self.config.l2_reg, self.config.n_class,
                          'doc_softmax')
Exemple #13
0
def dual_att_sen(inputs_o, sen_len_o, inputs_r, sen_len_r, keep_prob1,
                 keep_prob2):
    print 'I am dual-att-sen!'
    cell = tf.contrib.rnn.LSTMCell
    inputs_o = tf.nn.dropout(inputs_o, keep_prob=keep_prob1)
    hiddens_o = bi_dynamic_rnn(cell, inputs_o, FLAGS.n_hidden, sen_len_o,
                               FLAGS.max_sentence_len, 'sentence_o', 'all')

    inputs_r = tf.nn.dropout(inputs_r, keep_prob=keep_prob1)
    hiddens_r = bi_dynamic_rnn(cell, inputs_r, FLAGS.n_hidden, sen_len_r,
                               FLAGS.max_sentence_len, 'sentence_r', 'all')

    hiddens = tf.concat([hiddens_o, hiddens_r], 2)
    alpha = mlp_attention_layer(hiddens, sen_len_o, 4 * FLAGS.n_hidden,
                                FLAGS.l2_reg, FLAGS.random_base, 'sen')
    outputs = tf.reshape(tf.matmul(alpha, hiddens), [-1, 4 * FLAGS.n_hidden])
    logits = softmax_layer(outputs, 4 * FLAGS.n_hidden, FLAGS.random_base,
                           keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return logits
Exemple #14
0
def lcr_rot(input_fw, input_bw, sen_len_fw, sen_len_bw, target, sen_len_tr, keep_prob1, keep_prob2, _id='all'):
    print('I am lcr_rot.')
    cell = tf.contrib.rnn.LSTMCell
    # left hidden
    input_fw = tf.nn.dropout(input_fw, keep_prob=keep_prob1)
    hiddens_l = bi_dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all')
    # hiddens_l = dynamic_rnn(cell, input_fw, FLAGS.n_hidden, sen_len_fw, FLAGS.max_sentence_len, 'l' + _id, 'all')
    pool_l = reduce_mean_with_len(hiddens_l, sen_len_fw)
    # right hidden
    input_bw = tf.nn.dropout(input_bw, keep_prob=keep_prob1)
    hiddens_r = bi_dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all')
    # hiddens_r = dynamic_rnn(cell, input_bw, FLAGS.n_hidden, sen_len_bw, FLAGS.max_sentence_len, 'r' + _id, 'all')
    pool_r = reduce_mean_with_len(hiddens_r, sen_len_bw)
    # target hidden
    target = tf.nn.dropout(target, keep_prob=keep_prob1)
    hiddens_t = bi_dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all')
    # hiddens_t = dynamic_rnn(cell, target, FLAGS.n_hidden, sen_len_tr, FLAGS.max_sentence_len, 't' + _id, 'all')
    pool_t = reduce_mean_with_len(hiddens_t, sen_len_tr)
    # pool_t = tf.concat(1, [target, target])

    # attention target
    att_t_l = bilinear_attention_layer(hiddens_t, pool_l, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg,
                                       FLAGS.random_base, 'tl')
    # att_t_l = bilinear_attention_layer(hiddens_t, pool_l, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'tl')
    outputs_t_l = tf.squeeze(tf.matmul(att_t_l, hiddens_t))
    att_t_r = bilinear_attention_layer(hiddens_t, pool_r, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg,
                                       FLAGS.random_base, 'tr')
    # att_t_r = bilinear_attention_layer(hiddens_t, pool_r, sen_len_tr, 2 * FLAGS.n_hidden, FLAGS.l2_reg, FLAGS.random_base, 'tr')
    outputs_t_r = tf.squeeze(tf.matmul(att_t_r, hiddens_t))

    # attention left
    att_l = bilinear_attention_layer(hiddens_l, outputs_t_l, sen_len_fw, 2 * FLAGS.n_hidden, FLAGS.l2_reg,
                                     FLAGS.random_base, 'l')
    outputs_l = tf.squeeze(tf.matmul(att_l, hiddens_l))
    # # attention right
    att_r = bilinear_attention_layer(hiddens_r, outputs_t_r, sen_len_bw, 2 * FLAGS.n_hidden, FLAGS.l2_reg,
                                     FLAGS.random_base, 'r')
    outputs_r = tf.squeeze(tf.matmul(att_r, hiddens_r))

    outputs = tf.concat([outputs_l, outputs_r, outputs_t_l, outputs_t_r], 1)
    # outputs = (outputs_l + outputs_r + outputs_t_l + outputs_t_r) / 4.0
    prob = softmax_layer(outputs, 8 * FLAGS.n_hidden, FLAGS.random_base, keep_prob2, FLAGS.l2_reg, FLAGS.n_class)
    return prob, att_l, att_r, att_t_l, att_t_r
Exemple #15
0
 def create_model(self, inputs):
     inputs = tf.reshape(
         inputs,
         [-1, self.config.max_sentence_len, self.config.embedding_dim])
     inputs = tf.nn.dropout(inputs, keep_prob=self.keep_prob1)
     cell = tf.contrib.rnn.LSTMCell
     outputs_dim = 2 * self.config.n_hidden
     outputs_sen = bi_dynamic_rnn(cell, inputs, self.config.n_hidden,
                                  tf.reshape(self.sen_len, [-1]),
                                  self.config.max_sentence_len, 'sen',
                                  'last')
     outputs_sen = tf.reshape(outputs_sen,
                              [-1, self.config.max_doc_len, outputs_dim])
     outputs_doc = bi_dynamic_rnn(cell, outputs_sen, self.config.n_hidden,
                                  self.doc_len, self.config.max_doc_len,
                                  'doc', 'last')
     doc_logits = softmax_layer(outputs_doc, outputs_dim,
                                self.config.random_base, self.keep_prob2,
                                self.config.l2_reg, self.config.n_class,
                                'doc')
     return doc_logits
Exemple #16
0
 def add_softmax_layer(self, inputs):
     inputs = tf.nn.dropout(inputs, keep_prob=self.keep_prob2)
     return softmax_layer(inputs, self.filter_num*len(self.filter_list), self.config.random_base,
                          self.keep_prob2, self.config.l2_reg, self.config.n_class)
Exemple #17
0
def main(_):
    word_id_mapping_o, w2v_o = load_w2v(FLAGS.embedding_file,
                                        FLAGS.embedding_dim, True)
    # word_id_mapping_o, w2v_o = load_word_embedding(FLAGS.word_id_file, FLAGS.embedding_file, FLAGS.embedding_dim, True)
    word_embedding_o = tf.constant(w2v_o, dtype=tf.float32)
    # word_id_mapping_r, w2v_r = load_w2v(FLAGS.embedding_file_r, FLAGS.embedding_dim, True)
    # word_id_mapping_r, w2v_r = load_word_embedding(FLAGS.word_id_file, FLAGS.embedding_file_r, FLAGS.embedding_dim, True)
    word_id_mapping_r = word_id_mapping_o
    word_embedding_r = tf.constant(w2v_o, dtype=tf.float32)

    with tf.name_scope('inputs'):
        keep_prob1 = tf.placeholder(tf.float32)
        keep_prob2 = tf.placeholder(tf.float32)
        x_o = tf.placeholder(tf.int32,
                             [None, FLAGS.max_doc_len, FLAGS.max_sentence_len])
        x_r = tf.placeholder(tf.int32,
                             [None, FLAGS.max_doc_len, FLAGS.max_sentence_len])
        sen_len_o = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len])
        sen_len_r = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len])
        doc_len_o = tf.placeholder(tf.int32, None)
        doc_len_r = tf.placeholder(tf.int32, None)
        y = tf.placeholder(tf.float32, [None, FLAGS.n_class])

    with tf.device('/gpu:0'):
        inputs_o = tf.nn.embedding_lookup(word_embedding_o, x_o)
        inputs_o = tf.reshape(
            inputs_o, [-1, FLAGS.max_sentence_len, FLAGS.embedding_dim])
        if FLAGS.method == 'ATT':
            h_o = hn_att(inputs_o, sen_len_o, doc_len_o, keep_prob1,
                         keep_prob2, 'o')
        else:
            h_o = hn(inputs_o, sen_len_o, doc_len_o, keep_prob1, keep_prob2,
                     'o')
        prob_o = softmax_layer(h_o, 2 * FLAGS.n_hidden, FLAGS.random_base,
                               keep_prob2, FLAGS.l2_reg, FLAGS.n_class, 'o')
    with tf.device('/gpu:1'):
        inputs_r = tf.nn.embedding_lookup(word_embedding_r, x_r)
        inputs_r = tf.reshape(
            inputs_r, [-1, FLAGS.max_sentence_len, FLAGS.embedding_dim])
        if FLAGS.method == 'ATT':
            h_r = hn_att(inputs_r, sen_len_r, doc_len_r, keep_prob1,
                         keep_prob2, 'r')
        else:
            h_r = hn(inputs_r, sen_len_r, doc_len_r, keep_prob1, keep_prob2,
                     'r')
        prob_r = softmax_layer(h_r, 2 * FLAGS.n_hidden, FLAGS.random_base,
                               keep_prob2, FLAGS.l2_reg, FLAGS.n_class, 'r')

    with tf.name_scope('loss'):
        r_y = tf.reverse(y, [True])
        reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        # loss = - tf.reduce_mean(y * tf.log(prob_o)) - tf.reduce_mean(r_y * tf.log(prob_r)) + sum(reg_loss)
        # prob = FLAGS.alpha * prob_o + (1.0 - FLAGS.alpha) * tf.reverse(prob_r, [False, True])
        loss_o = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=prob_o, labels=y))
        loss_r = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=prob_r, labels=y))
        loss = loss_o + loss_r + tf.add_n(reg_loss)
        prob = FLAGS.alpha * prob_o + (1.0 - FLAGS.alpha) * tf.reverse(
            prob_r, [True])
        all_vars = [var for var in tf.global_variables()]

    with tf.name_scope('train'):
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
        grads, global_norm = tf.clip_by_global_norm(
            tf.gradients(loss, all_vars), 5.0)
        train_op = optimizer.apply_gradients(zip(grads, all_vars),
                                             name='train_op',
                                             global_step=global_step)

    with tf.name_scope('predict'):
        cor_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
        acc_prob = tf.reduce_mean(tf.cast(cor_pred, tf.float32))
        acc_num = tf.reduce_sum(tf.cast(cor_pred, tf.int32))

    true_y = tf.argmax(y, 1)
    pred_y = tf.argmax(prob, 1)

    title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format(
        FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size,
        FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len,
        FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class)

    conf = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=conf) as sess:
        import time
        timestamp = str(int(time.time()))
        _dir = 'summary/' + str(timestamp) + '_' + title
        test_loss = tf.placeholder(tf.float32)
        test_acc = tf.placeholder(tf.float32)
        train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \
        validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess)

        save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/'
        saver = saver_func(save_dir)

        init = tf.global_variables_initializer()
        sess.run(init)

        # saver.restore(sess, '/-')

        tr_x, tr_y, tr_sen_len, tr_doc_len = load_inputs_document(
            FLAGS.train_file, word_id_mapping_o, FLAGS.max_sentence_len,
            FLAGS.max_doc_len)
        te_x, te_y, te_sen_len, te_doc_len = load_inputs_document(
            FLAGS.test_file, word_id_mapping_o, FLAGS.max_sentence_len,
            FLAGS.max_doc_len)
        tr_x_r, tr_y_r, tr_sen_len_r, tr_doc_len_r = load_inputs_document(
            FLAGS.train_file_r, word_id_mapping_r, FLAGS.max_sentence_len,
            FLAGS.max_doc_len)
        te_x_r, te_y_r, te_sen_len_r, te_doc_len_r = load_inputs_document(
            FLAGS.test_file_r, word_id_mapping_r, FLAGS.max_sentence_len,
            FLAGS.max_doc_len)

        # v_x, v_y, v_sen_len, v_doc_len = load_inputs_document(
        #     FLAGS.validate_file,
        #     word_id_mapping,
        #     FLAGS.max_sentence_len,
        #     FLAGS.max_doc_len
        # )

        # v_x, v_y, v_sen_len, v_doc_len = load_inputs_document(
        #     FLAGS.validate_file,
        #     word_id_mapping,
        #     FLAGS.max_sentence_len,
        #     FLAGS.max_doc_len
        # )

        def get_batch_data(xo,
                           slo,
                           dlo,
                           xr,
                           slr,
                           dlr,
                           yy,
                           batch_size,
                           kp1,
                           kp2,
                           is_shuffle=True):
            for index in batch_index(len(yy), batch_size, 1, is_shuffle):
                feed_dict = {
                    x_o: xo[index],
                    x_r: xr[index],
                    y: yy[index],
                    sen_len_o: slo[index],
                    sen_len_r: slr[index],
                    doc_len_o: dlo[index],
                    doc_len_r: dlr[index],
                    keep_prob1: kp1,
                    keep_prob2: kp2,
                }
                yield feed_dict, len(index)

        max_acc, max_prob, step = 0., None, None
        max_ty, max_py = None, None
        for i in xrange(FLAGS.n_iter):
            for train, _ in get_batch_data(tr_x, tr_sen_len, tr_doc_len,
                                           tr_x_r, tr_sen_len_r, tr_doc_len_r,
                                           tr_y, FLAGS.batch_size,
                                           FLAGS.keep_prob1, FLAGS.keep_prob2):
                _, step, summary = sess.run(
                    [train_op, global_step, train_summary_op], feed_dict=train)
                train_summary_writer.add_summary(summary, step)
                # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]))
                # sess.run(embed_update)

            acc, cost, cnt = 0., 0., 0
            p, ty, py = [], [], []
            for test, num in get_batch_data(te_x, te_sen_len, te_doc_len,
                                            te_x_r, te_sen_len_r, te_doc_len_r,
                                            te_y, 2000, 1.0, 1.0, False):
                _loss, _acc, _p, _ty, _py = sess.run(
                    [loss, acc_num, prob, true_y, pred_y], feed_dict=test)
                p += list(_p)
                ty += list(_ty)
                py += list(_py)
                acc += _acc
                cost += _loss * num
                cnt += num
            print 'all samples={}, correct prediction={}'.format(cnt, acc)
            acc = acc / cnt
            cost = cost / cnt
            print 'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format(
                i, cost, acc)
            summary = sess.run(test_summary_op,
                               feed_dict={
                                   test_loss: cost,
                                   test_acc: acc
                               })
            test_summary_writer.add_summary(summary, step)
            if acc > max_acc:
                max_acc = acc
                max_prob = p
                max_ty = ty
                max_py = py
                # saver.save(sess, save_dir, global_step=step)

        print 'P:', precision_score(max_ty, max_py, average=None)
        print 'R:', recall_score(max_ty, max_py, average=None)
        print 'F:', f1_score(max_ty, max_py, average=None)

        fp = open(FLAGS.prob_file, 'w')
        for item in max_prob:
            fp.write(' '.join([str(it) for it in item]) + '\n')
        print 'Optimization Finished! Max acc={}'.format(max_acc)

        print 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format(
            FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size,
            FLAGS.n_hidden, FLAGS.l2_reg)