Ejemplo n.º 1
0
 def __init__(self, sequence_length, n_hidden_rnn, n_in_mlp, n_hidden_mlp, n_out,
         L1_reg, L2_reg, learning_rate, word_embedding, non_static):
     """
     question-answer rnn model init and definition.
     :param sequence_length: sequence length
     :param n_hidden_rnn: rnn hidden units
     :param n_in_mlp: mlp input size
     :param n_hidden_mlp: mlp hidden size
     :param n_out: mlp out size
     :param L1_reg: mlp L1 loss
     :param L2_reg: mlp L2 loss
     :param learning_rate: learning rate for update
     :param word_embedding: word embedding
     :param non_static: bool, update embedding or not
     """
     self.lr = learning_rate
     self.word_embedding = word_embedding
     # define the placeholder
     with tf.name_scope('placeholder'):
         self.q_input = tf.placeholder(tf.int64, shape=[None, sequence_length], name='query_input')
         self.a_input = tf.placeholder(tf.int64, shape=[None, sequence_length], name='answer_input')
         self.l_input = tf.placeholder(tf.int64, shape=[None], name='label_input')  # one-hot -> [batch_size. n_out]
         self.keep_prop = tf.placeholder(tf.float32, name='keep_prop')
     # transfer input to vec with embedding.
     with tf.name_scope("embedding"):
         _word_embedding = tf.get_variable(name='word_emb', shape=self.word_embedding.shape, dtype=tf.float32,
                                           initializer=tf.constant_initializer(self.word_embedding),
                                           trainable=non_static)
         q_embedding = tf.nn.embedding_lookup(_word_embedding, self.q_input)
         a_embedding = tf.nn.embedding_lookup(_word_embedding, self.a_input)
         print "input shape(embedding): ", q_embedding.get_shape()
     # define rnn model.
     with tf.variable_scope("RNN"):
         # rnn layer
         rnn_layer = RNNModule(n_hidden_rnn, cell="GRU")
         q_sentence_vec, a_sentence_vec = rnn_layer(q_embedding, a_embedding)
     # define classifier.
     with tf.name_scope("MLPDrop"):
         interact_layer = InteractLayer(n_hidden_rnn, n_hidden_rnn, dim=n_in_mlp)
         qa_vec = interact_layer(q_sentence_vec, a_sentence_vec)
         bn_layer = BatchNormLayer(n_in=n_in_mlp, inputs=qa_vec)
         classifier = MLP(bn_layer.out, n_in_mlp, n_hidden_mlp, n_out)
         # classifier = MLPDropout(bn_layer.out, n_in_mlp, n_hidden_mlp, n_out, keep_prop=self.keep_prop)
     # define cost, optimizer and output.
     self.pred_prob = classifier.pred_prob()
     self.error = classifier.errors(self.l_input)
     self.cost = classifier.cross_entropy(self.l_input) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
     self.optimizer = tf.train.RMSPropOptimizer(self.lr, 0.9).minimize(self.cost)
Ejemplo n.º 2
0
 def __init__(self,
              word_embedding,
              img_h,
              img_w,
              filter_windows,
              feature_maps,
              n_in,
              n_hidden,
              n_out,
              L1_reg,
              L2_reg,
              learning_rate,
              non_static=False):
     """
     question-answer cnn model init and definition.
     :param word_embedding: word embedding
     :param img_h: max sentence length.
     :param img_w: embedding dim.
     :param filter_windows: filter height, e.g [1,2,3]
     :param feature_maps: filter_num.
     :param n_in: mlp input size.
     :param n_hidden: mlp hidden size.
     :param n_out: mlp out size.
     :param L1_reg: mlp L1 loss.
     :param L2_reg: mlp L2 loss.
     :param learning_rate: learning rate for update.
     :param non_static: bool, update embedding or not.
     """
     self.lr = learning_rate
     self.word_embedding = word_embedding
     self.num_feature_maps = feature_maps * len(filter_windows)
     # define the placeholder
     with tf.name_scope('placeholder'):
         self.q_input = tf.placeholder(tf.int64,
                                       shape=[None, img_h],
                                       name='query_input')
         self.a_input = tf.placeholder(tf.int64,
                                       shape=[None, img_h],
                                       name='answer_input')
         self.l_input = tf.placeholder(
             tf.int64, shape=[None],
             name='label_input')  # one-hot -> [batch_size, n_out]
         self.keep_prop = tf.placeholder(tf.float32,
                                         name="keep_prop")  # drop
     # transfer input to vec with embedding.
     with tf.name_scope("embedding"):
         _word_embedding = tf.get_variable(
             name='word_emb',
             shape=self.word_embedding.shape,
             dtype=tf.float32,
             initializer=tf.constant_initializer(self.word_embedding),
             trainable=non_static)
         q_embedding = tf.nn.embedding_lookup(_word_embedding, self.q_input)
         a_embedding = tf.nn.embedding_lookup(_word_embedding, self.a_input)
         q_embedding_expanded = tf.expand_dims(q_embedding, -1)
         a_embedding_expanded = tf.expand_dims(a_embedding, -1)
         print "input shape(embedding expanded): ", q_embedding_expanded.get_shape(
         )
     # define cnn model for qa.
     with tf.variable_scope("model_layers"):
         inception_module = InceptionModule(img_h, img_w, filter_windows,
                                            feature_maps)
         q_sentence_vec, a_sentence_vec = inception_module(
             q_embedding_expanded, a_embedding_expanded)
         interact_layer = InteractLayer(self.num_feature_maps,
                                        self.num_feature_maps,
                                        dim=n_in)
         qa_vec = interact_layer(q_sentence_vec, a_sentence_vec)
         bn_layer = BatchNormLayer(n_in=n_in, inputs=qa_vec)
     # define the classifier.
     with tf.name_scope("mlp"):
         classifier = MLP(bn_layer.out, n_in, n_hidden, n_out)
         # classifier = MLPDropout(bn_layer.out, n_in, n_hidden, n_out, keep_prop=self.keep_prop)
         # define cost, optimizer and output.
         self.pred_prob = classifier.pred_prob()
         self.error = classifier.errors(self.l_input)
         self.cost = classifier.cross_entropy(
             self.l_input
         ) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
         self.optimizer = tf.train.RMSPropOptimizer(self.lr,
                                                    0.9).minimize(self.cost)