def forward_pass(self, shared_resources, embedded_question, embedded_support, num_classes, tensors): # question - hypothesis; support - premise repr_dim = shared_resources.config['repr_dim'] dropout = shared_resources.config.get("dropout", 0.0) with tf.variable_scope('embedding_projection') as vs: embedded_question = tf.layers.dense(embedded_question, repr_dim, tf.tanh, name='projection') vs.reuse_variables() embedded_support = tf.layers.dense(embedded_support, repr_dim, tf.tanh, name='projection') # keep dropout mask constant over time dropout_shape = [ tf.shape(embedded_question)[0], 1, tf.shape(embedded_question)[2] ] embedded_question = tf.nn.dropout(embedded_question, 1.0 - dropout, dropout_shape) embedded_support = tf.nn.dropout(embedded_support, 1.0 - dropout, dropout_shape) fused_rnn = tf.contrib.rnn.LSTMBlockFusedCell(repr_dim) # [batch, 2*output_dim] -> [batch, num_classes] _, q_states = fused_birnn(fused_rnn, embedded_question, sequence_length=tensors.question_length, dtype=tf.float32, time_major=False, scope="question_rnn") outputs, _ = fused_birnn(fused_rnn, embedded_support, sequence_length=tensors.support_length, dtype=tf.float32, initial_state=q_states, time_major=False, scope="support_rnn") # [batch, T, 2 * dim] -> [batch, dim] outputs = tf.concat([outputs[0], outputs[1]], axis=2) hidden = tf.layers.dense( outputs, repr_dim, tf.nn.relu, name="hidden") * tf.expand_dims( tf.sequence_mask(tensors.support_length, maxlen=tf.shape(outputs)[1], dtype=tf.float32), 2) hidden = tf.reduce_max(hidden, axis=1) # [batch, dim] -> [batch, num_classes] outputs = tf.layers.dense(hidden, num_classes, name="classification") return outputs
def nli_model(size, num_classes, emb_question, question_length, emb_support, support_length): fused_rnn = tf.contrib.rnn.LSTMBlockFusedCell(size) # [batch, 2*output_dim] -> [batch, num_classes] _, q_states = fused_birnn(fused_rnn, emb_question, sequence_length=question_length, dtype=tf.float32, time_major=False, scope="question_rnn") outputs, _ = fused_birnn(fused_rnn, emb_support, sequence_length=support_length, dtype=tf.float32, initial_state=q_states, time_major=False, scope="support_rnn") # [batch, T, 2 * dim] -> [batch, dim] outputs = tf.concat([outputs[0], outputs[1]], axis=2) hidden = tf.layers.dense(outputs, size, tf.nn.relu, name="hidden") * tf.expand_dims( misc.mask_for_lengths(support_length, max_length=tf.shape(outputs)[1], mask_right=False, value=1.0), 2) hidden = tf.reduce_max(hidden, axis=1) # [batch, dim] -> [batch, num_classes] outputs = tf.layers.dense(hidden, num_classes, name="classification") return outputs
def _bi_rnn(size, fused_rnn, sequence, seq_length, with_projection=False): output = rnn.fused_birnn(fused_rnn, sequence, seq_length, dtype=tf.float32, scope='rnn')[0] output = tf.concat(output, 2) if with_projection: projection_initializer = tf.constant_initializer( np.concatenate([np.eye(size), np.eye(size)])) output = tf.layers.dense(output, size, kernel_initializer=projection_initializer, name='projection') return output