def question_encoder(question, hparams, name="encoder"): """Question encoder, run LSTM encoder and get the last output as encoding.""" with tf.variable_scope(name, "encoder", values=[question]): question = common_layers.flatten4d3d(question) padding = common_attention.embedding_to_padding(question) length = common_attention.padding_to_length(padding) max_question_length = hparams.max_question_length question = question[:, :max_question_length, :] actual_question_length = common_layers.shape_list(question)[1] length = tf.minimum(length, max_question_length) padding = [[0, 0], [0, max_question_length - actual_question_length], [0, 0]] question = tf.pad(question, padding) question_shape = question.get_shape().as_list() question_shape[1] = max_question_length question.set_shape(question_shape) # apply tanh dropout on question embedding question = tf.tanh(question) question = tf.nn.dropout(question, keep_prob=1. - hparams.dropout) question = [question[:, i, :] for i in range(max_question_length)] # rnn_layers = [_get_rnn_cell(hparams) # for _ in range(hparams.num_rnn_layers)] # rnn_multi_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) rnn_cell = _get_rnn_cell(hparams) # outputs, _ = tf.nn.dynamic_rnn( # rnn_cell, question, length, dtype=tf.float32) _, state = tf.nn.static_rnn(rnn_cell, question, sequence_length=length, dtype=tf.float32) # outputs = [tf.expand_dims(output, axis=1) for output in outputs] # outputs = tf.concat(outputs, axis=1) # utils.collect_named_outputs("vqa_attention_debug", "question_output", # outputs) # utils.collect_named_outputs("vqa_attention_debug", "question_state", # state.h) # batch_size = common_layers.shape_list(outputs)[0] # row_indices = tf.range(batch_size) # # length - 1 as index # indices = tf.transpose([row_indices, tf.maximum(length-1, 0)]) # last_output = tf.gather_nd(outputs, indices) # utils.collect_named_outputs("vqa_attention_debug", # "question_final_output", last_output) return state.h
def question_encoder(question, hparams, name="encoder"): """Question encoder, run LSTM encoder and get the last output as encoding.""" with tf.variable_scope(name, "encoder", values=[question]): question = common_layers.flatten4d3d(question) padding = common_attention.embedding_to_padding(question) length = common_attention.padding_to_length(padding) max_question_length = hparams.max_question_length question = question[:, :max_question_length, :] actual_question_length = common_layers.shape_list(question)[1] length = tf.minimum(length, max_question_length) padding = [[0, 0], [0, max_question_length-actual_question_length], [0, 0]] question = tf.pad(question, padding) question_shape = question.get_shape().as_list() question_shape[1] = max_question_length question.set_shape(question_shape) # apply tanh dropout on question embedding question = tf.tanh(question) question = tf.nn.dropout(question, keep_prob=1.-hparams.dropout) question = [question[:, i, :] for i in range(max_question_length)] # rnn_layers = [_get_rnn_cell(hparams) # for _ in range(hparams.num_rnn_layers)] # rnn_multi_cell = tf.contrib.rnn.MultiRNNCell(rnn_layers) rnn_cell = _get_rnn_cell(hparams) # outputs, _ = tf.nn.dynamic_rnn( # rnn_cell, question, length, dtype=tf.float32) _, state = tf.nn.static_rnn(rnn_cell, question, sequence_length=length, dtype=tf.float32) # outputs = [tf.expand_dims(output, axis=1) for output in outputs] # outputs = tf.concat(outputs, axis=1) # utils.collect_named_outputs("vqa_attention_debug", "question_output", # outputs) # utils.collect_named_outputs("vqa_attention_debug", "question_state", # state.h) # batch_size = common_layers.shape_list(outputs)[0] # row_indices = tf.range(batch_size) # # length - 1 as index # indices = tf.transpose([row_indices, tf.maximum(length-1, 0)]) # last_output = tf.gather_nd(outputs, indices) # utils.collect_named_outputs("vqa_attention_debug", # "question_final_output", last_output) return state.h
def question_encoder(question, hparams, name="encoder"): """Question encoder, run LSTM encoder and get the last output as encoding.""" with tf.variable_scope(name, "encoder", values=[question]): question = common_layers.flatten4d3d(question) padding = common_attention.embedding_to_padding(question) length = common_attention.padding_to_length(padding) max_question_length = hparams.max_question_length question = question[:, :max_question_length, :] actual_question_length = common_layers.shape_list(question)[1] length = tf.minimum(length, max_question_length) padding = [[0, 0], [0, max_question_length - actual_question_length], [0, 0]] question = tf.pad(question, padding) question_shape = question.get_shape().as_list() question_shape[1] = max_question_length question.set_shape(question_shape) question = [question[:, i, :] for i in range(max_question_length)] # rnn_layers = [_get_rnn_cell(hparams) # for _ in range(hparams.num_rnn_layers)] # rnn_multi_cell = tf.contrib.rnn.MultiRNNCell(rnn_layers) rnn_cell = _get_rnn_cell(hparams) # outputs, _ = tf.nn.dynamic_rnn( # rnn_cell, question, length, dtype=tf.float32) outputs, _ = tf.nn.static_rnn(rnn_cell, question, sequence_length=length, dtype=tf.float32) outputs = [tf.expand_dims(output, axis=1) for output in outputs] outputs = tf.concat(outputs, axis=1) batch_size = common_layers.shape_list(outputs)[0] row_indices = tf.range(batch_size) # length - 1 as index indices = tf.transpose([row_indices, tf.maximum(length - 1, 0)]) last_output = tf.gather_nd(outputs, indices) return last_output
def compute_knowledge_selection_and_loss(self, features, encoder_output, fact_embedding, fact_lengths, margin, num_negative_samples): """Compute knowledge selection and loss. Args: features: features. encoder_output: <tf.float32>[batch_size, input_length, hidden_dim] fact_embedding: <tf.float32>[batch_size*triple_num, max_triple_length, emb_dim] fact_lengths: # <tf.int32>[batch_size*triple_num] margin: integer value for max margin in TransE loss, num_negative_samples: shuffle and sample multiple negative examples for the TransE loss Returns: knowledge_weights: knowledge_loss: """ hparams = self._hparams encoder_output_shape = common_layers.shape_list(encoder_output) encoder_hidden_dim = encoder_output_shape[-1] inputs = features["inputs"] # <tf.float32>[batch_size, input_length, emb_dim] inputs = tf.squeeze(inputs, 2) # <tf.float32>[batch_size, input_length] context_padding = common_attention.embedding_to_padding(inputs) # <tf.float32>[batch_size] context_lens = tf.to_float( common_attention.padding_to_length(context_padding)) # <tf.float32>[batch_size, 1] context_lens = tf.expand_dims(context_lens, -1) # Compute context vector summary. # <tf.float32>[batch_size, hidden_dim] context_vector_summary = compute_summary_embedding( encoder_output, context_lens, hparams) knowledge_encoder_output = compute_average_embedding( fact_embedding, fact_lengths) # <tf.float32>[batch_size, triple_num, emb_dim] knowledge_encoder_output = tf.reshape( knowledge_encoder_output, [-1, self.triple_num, encoder_hidden_dim]) original_knowledge_encoder_output = knowledge_encoder_output if hparams.similarity_fuction == "dot_product": triple_logits = tf.squeeze( tf.matmul(knowledge_encoder_output, tf.expand_dims(context_vector_summary, 2)), -1) elif hparams.similarity_fuction == "bilinear": # Tile the context vector summary. # <tf.float32>[batch_size, triple_num*hidden_dim] tiled_context_vector = tf.tile(context_vector_summary, [1, self.triple_num]) # <tf.float32>[batch_size, triple_num, hidden_dim] context_vector = tf.reshape( tiled_context_vector, [-1, self.triple_num, encoder_hidden_dim]) # compute outer product context_vector = tf.expand_dims(context_vector, -1) knowledge_encoder_output = tf.expand_dims(knowledge_encoder_output, 2) # <tf.float32>[batch_size, triple_num, hidden_dim, hidden_dim] outer_product = tf.matmul(context_vector, knowledge_encoder_output) outer_product = tf.reshape( outer_product, [-1, self.triple_num, encoder_hidden_dim * encoder_hidden_dim]) triple_logits = tf.squeeze( tf.layers.dense(outer_product, 1, name="knolwedge_final_mlp"), -1) avg_triple_loss = 0.0 triple_labels = features["triple_labels"] subject_mask = tf.reshape( features["subject_mask"], [-1, self.triple_num, hparams.max_triple_length]) subject_mask = tf.reshape(subject_mask, [-1, hparams.max_triple_length]) predicate_mask = tf.reshape( features["predicate_mask"], [-1, self.triple_num, hparams.max_triple_length]) predicate_mask = tf.reshape(predicate_mask, [-1, hparams.max_triple_length]) object_mask = tf.reshape( features["object_mask"], [-1, self.triple_num, hparams.max_triple_length]) object_mask = tf.reshape(object_mask, [-1, hparams.max_triple_length]) # mask : [bs, max_seq_len, triple_num] # the below operation will result in [bs*triple_num,emb_dim] subject_length = tf.cast( tf.expand_dims(tf.reduce_sum(subject_mask, -1), 1), tf.float32) # [bs*tn] object_length = tf.cast( tf.expand_dims(tf.reduce_sum(object_mask, -1), 1), tf.float32) predicate_length = tf.cast( tf.expand_dims(tf.reduce_sum(predicate_mask, -1), 1), tf.float32) # expand dimension 2 to be able to broadcast subject_mask = tf.cast(tf.expand_dims(subject_mask, 2), tf.float32) predicate_mask = tf.cast(tf.expand_dims(predicate_mask, 2), tf.float32) object_mask = tf.cast(tf.expand_dims(object_mask, 2), tf.float32) subject_vect = tf.reduce_sum(tf.multiply( fact_embedding, subject_mask), 1) / ( subject_length + tf.broadcast_to(tf.constant([1e-5]), tf.shape(subject_length))) object_vect = tf.reduce_sum(tf.multiply( fact_embedding, object_mask), 1) / ( object_length + tf.broadcast_to(tf.constant([1e-5]), tf.shape(object_length))) predicate_vect = tf.reduce_sum( tf.multiply(fact_embedding, predicate_mask), 1) / (predicate_length + tf.broadcast_to( tf.constant([1e-5]), tf.shape(predicate_length))) # Shuffled rows to generate adversarial samples shuffled_subject_vect = [] shuffled_object_vect = [] for _ in range(num_negative_samples): shuffled_subject_vect += [ tf.gather( subject_vect, tf.random.shuffle(tf.range(tf.shape(subject_vect)[0]))) ] # [bs*tn,d] shuffled_object_vect += [ tf.gather( object_vect, tf.random.shuffle(tf.range(tf.shape(object_vect)[0]))) ] # [bs*tn,d] # KB pretraining loss positive_loss = tf.reduce_mean( tf.squared_difference(subject_vect + predicate_vect, object_vect)) negative_loss = 0 for n_adv in range(num_negative_samples): negative_loss += tf.reduce_mean( tf.squared_difference( shuffled_subject_vect[n_adv] + predicate_vect, object_vect)) negative_loss += tf.reduce_mean( tf.squared_difference(subject_vect + predicate_vect, shuffled_object_vect[n_adv])) # TransE Loss negative_loss = negative_loss / (2 * num_negative_samples) transe_loss = tf.clip_by_value(margin + positive_loss - negative_loss, clip_value_min=0, clip_value_max=100) if hparams.mode != tf.estimator.ModeKeys.PREDICT: triple_losses = tf.nn.weighted_cross_entropy_with_logits( labels=triple_labels, logits=triple_logits, pos_weight=hparams.pos_weight) avg_triple_loss = tf.reduce_mean(triple_losses) tf.summary.scalar("triple_loss", avg_triple_loss) return triple_logits, avg_triple_loss, original_knowledge_encoder_output, transe_loss
def compute_knowledge_selection_and_loss(self, features, encoder_output, fact_embedding, fact_lengths): """Compute knowledge selection and loss. Args: features: features. encoder_output: <tf.float32>[batch_size, input_length, hidden_dim] fact_embedding: <tf.float32>[batch_size*max_triple_num, max_triple_length, emb_dim] fact_lengths: # <tf.int32>[batch_size*max_triple_num] Returns: knowledge_weights: knowledge_loss: """ hparams = self._hparams encoder_output_shape = common_layers.shape_list(encoder_output) encoder_hidden_dim = encoder_output_shape[-1] inputs = features["inputs"] # <tf.float32>[batch_size, input_length, emb_dim] inputs = tf.squeeze(inputs, 2) # <tf.float32>[batch_size, input_length] context_padding = common_attention.embedding_to_padding(inputs) # <tf.float32>[batch_size] context_lens = tf.to_float( common_attention.padding_to_length(context_padding)) # <tf.float32>[batch_size, 1] context_lens = tf.expand_dims(context_lens, -1) # Compute context vector summary. # <tf.float32>[batch_size, hidden_dim] context_vector_summary = compute_summary_embedding(encoder_output, context_lens, hparams) knowledge_encoder_output = compute_average_embedding( fact_embedding, fact_lengths) # <tf.float32>[batch_size, triple_num, emb_dim] knowledge_encoder_output = tf.reshape( knowledge_encoder_output, [-1, self.triple_num, encoder_hidden_dim]) original_knowledge_encoder_output = knowledge_encoder_output if hparams.similarity_fuction == "dot_product": triple_logits = tf.squeeze( tf.matmul(knowledge_encoder_output, tf.expand_dims(context_vector_summary, 2)), -1) elif hparams.similarity_fuction == "bilinear": # Tile the context vector summary. # <tf.float32>[batch_size, max_triple_num*hidden_dim] tiled_context_vector = tf.tile(context_vector_summary, [1, self.triple_num]) # <tf.float32>[batch_size, max_triple_num, hidden_dim] context_vector = tf.reshape(tiled_context_vector, [-1, self.triple_num, encoder_hidden_dim]) # compute outer product context_vector = tf.expand_dims(context_vector, -1) knowledge_encoder_output = tf.expand_dims(knowledge_encoder_output, 2) # <tf.float32>[batch_size, max_triple_num, hidden_dim, hidden_dim] outer_product = tf.matmul(context_vector, knowledge_encoder_output) outer_product = tf.reshape( outer_product, [-1, self.triple_num, encoder_hidden_dim * encoder_hidden_dim]) triple_logits = tf.squeeze( tf.layers.dense(outer_product, 1, name="knolwedge_final_mlp"), -1) avg_triple_loss = 0.0 triple_labels = features["triple_labels"] triple_labels = triple_labels[:, :self.triple_num] if hparams.mode != tf.estimator.ModeKeys.PREDICT: triple_losses = tf.nn.weighted_cross_entropy_with_logits( labels=triple_labels, logits=triple_logits, pos_weight=hparams.pos_weight) avg_triple_loss = tf.reduce_mean(triple_losses) tf.summary.scalar("triple_loss", avg_triple_loss) return triple_logits, avg_triple_loss, original_knowledge_encoder_output