def __init__(self, sequence_length, n_hidden_rnn, n_in_mlp, n_hidden_mlp, n_out, L1_reg, L2_reg, learning_rate, word_embedding, non_static): """ question-answer rnn model init and definition. :param sequence_length: sequence length :param n_hidden_rnn: rnn hidden units :param n_in_mlp: mlp input size :param n_hidden_mlp: mlp hidden size :param n_out: mlp out size :param L1_reg: mlp L1 loss :param L2_reg: mlp L2 loss :param learning_rate: learning rate for update :param word_embedding: word embedding :param non_static: bool, update embedding or not """ self.lr = learning_rate self.word_embedding = word_embedding # define the placeholder with tf.name_scope('placeholder'): self.q_input = tf.placeholder(tf.int64, shape=[None, sequence_length], name='query_input') self.a_input = tf.placeholder(tf.int64, shape=[None, sequence_length], name='answer_input') self.l_input = tf.placeholder(tf.int64, shape=[None], name='label_input') # one-hot -> [batch_size. n_out] self.keep_prop = tf.placeholder(tf.float32, name='keep_prop') # transfer input to vec with embedding. with tf.name_scope("embedding"): _word_embedding = tf.get_variable(name='word_emb', shape=self.word_embedding.shape, dtype=tf.float32, initializer=tf.constant_initializer(self.word_embedding), trainable=non_static) q_embedding = tf.nn.embedding_lookup(_word_embedding, self.q_input) a_embedding = tf.nn.embedding_lookup(_word_embedding, self.a_input) print "input shape(embedding): ", q_embedding.get_shape() # define rnn model. with tf.variable_scope("RNN"): # rnn layer rnn_layer = RNNModule(n_hidden_rnn, cell="GRU") q_sentence_vec, a_sentence_vec = rnn_layer(q_embedding, a_embedding) # define classifier. with tf.name_scope("MLPDrop"): interact_layer = InteractLayer(n_hidden_rnn, n_hidden_rnn, dim=n_in_mlp) qa_vec = interact_layer(q_sentence_vec, a_sentence_vec) bn_layer = BatchNormLayer(n_in=n_in_mlp, inputs=qa_vec) classifier = MLP(bn_layer.out, n_in_mlp, n_hidden_mlp, n_out) # classifier = MLPDropout(bn_layer.out, n_in_mlp, n_hidden_mlp, n_out, keep_prop=self.keep_prop) # define cost, optimizer and output. self.pred_prob = classifier.pred_prob() self.error = classifier.errors(self.l_input) self.cost = classifier.cross_entropy(self.l_input) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr self.optimizer = tf.train.RMSPropOptimizer(self.lr, 0.9).minimize(self.cost)
def __init__(self, word_embedding, img_h, img_w, filter_windows, feature_maps, n_in, n_hidden, n_out, L1_reg, L2_reg, learning_rate, non_static=False): """ question-answer cnn model init and definition. :param word_embedding: word embedding :param img_h: max sentence length. :param img_w: embedding dim. :param filter_windows: filter height, e.g [1,2,3] :param feature_maps: filter_num. :param n_in: mlp input size. :param n_hidden: mlp hidden size. :param n_out: mlp out size. :param L1_reg: mlp L1 loss. :param L2_reg: mlp L2 loss. :param learning_rate: learning rate for update. :param non_static: bool, update embedding or not. """ self.lr = learning_rate self.word_embedding = word_embedding self.num_feature_maps = feature_maps * len(filter_windows) # define the placeholder with tf.name_scope('placeholder'): self.q_input = tf.placeholder(tf.int64, shape=[None, img_h], name='query_input') self.a_input = tf.placeholder(tf.int64, shape=[None, img_h], name='answer_input') self.l_input = tf.placeholder( tf.int64, shape=[None], name='label_input') # one-hot -> [batch_size, n_out] self.keep_prop = tf.placeholder(tf.float32, name="keep_prop") # drop # transfer input to vec with embedding. with tf.name_scope("embedding"): _word_embedding = tf.get_variable( name='word_emb', shape=self.word_embedding.shape, dtype=tf.float32, initializer=tf.constant_initializer(self.word_embedding), trainable=non_static) q_embedding = tf.nn.embedding_lookup(_word_embedding, self.q_input) a_embedding = tf.nn.embedding_lookup(_word_embedding, self.a_input) q_embedding_expanded = tf.expand_dims(q_embedding, -1) a_embedding_expanded = tf.expand_dims(a_embedding, -1) print "input shape(embedding expanded): ", q_embedding_expanded.get_shape( ) # define cnn model for qa. with tf.variable_scope("model_layers"): inception_module = InceptionModule(img_h, img_w, filter_windows, feature_maps) q_sentence_vec, a_sentence_vec = inception_module( q_embedding_expanded, a_embedding_expanded) interact_layer = InteractLayer(self.num_feature_maps, self.num_feature_maps, dim=n_in) qa_vec = interact_layer(q_sentence_vec, a_sentence_vec) bn_layer = BatchNormLayer(n_in=n_in, inputs=qa_vec) # define the classifier. with tf.name_scope("mlp"): classifier = MLP(bn_layer.out, n_in, n_hidden, n_out) # classifier = MLPDropout(bn_layer.out, n_in, n_hidden, n_out, keep_prop=self.keep_prop) # define cost, optimizer and output. self.pred_prob = classifier.pred_prob() self.error = classifier.errors(self.l_input) self.cost = classifier.cross_entropy( self.l_input ) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr self.optimizer = tf.train.RMSPropOptimizer(self.lr, 0.9).minimize(self.cost)