def testFunctionalFC(self): inputs = tf.random_uniform((5, 3), seed=1) outputs = core_layers.fully_connected( inputs, 2, activation=tf.nn.relu, name='fc') self.assertEqual( len(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(outputs.op.name, 'fc/Relu') self.assertEqual(outputs.get_shape().as_list(), [5, 2])
def rr_rnn(config, wx, wx_num, cx, cx_num, sx, sx_num, n_unit, is_training): keep_prob = config.keep_prob max_word_num = wx.get_shape()[1].value with tf.variable_scope('char_neocde') as scope: cx_e = char_rnn_encoder(config, cx, cx_num, max_word_num) with tf.variable_scope('syll_encode') as scope: sx_e = char_rnn_encoder(config, sx, sx_num, max_word_num) wx_e = tf.concat([wx, cx_e, sx_e], axis=2) wx_e = fully_connected(wx_e, n_unit) with tf.variable_scope('dropout'): wx_e = layers.dropout(wx_e, keep_prob=keep_prob, is_training=is_training) cell = MultiRNNCell([GRUCell(n_unit)] * config.cell_stack_count) _, out = bidirectional_rnn(cell, cell, wx_e, wx_num) return out[0], None
def testFunctionalFCInScope(self): with tf.variable_scope('test'): inputs = tf.random_uniform((5, 3), seed=1) core_layers.fully_connected(inputs, 2, name='fc') var = tf.trainable_variables()[0] self.assertEqual(var.name, 'test/fc/weights:0') with tf.variable_scope('test1') as scope: inputs = tf.random_uniform((5, 3), seed=1) core_layers.fully_connected(inputs, 2, name=scope) var = tf.trainable_variables()[2] self.assertEqual(var.name, 'test1/weights:0') with tf.variable_scope('test2'): inputs = tf.random_uniform((5, 3), seed=1) core_layers.fully_connected(inputs, 2) var = tf.trainable_variables()[4] self.assertEqual(var.name, 'test2/fully_connected/weights:0')
def merge_weight_predict(is_train, context_rep, question_rep, context_mask, merger, post_merger, max_pool, predictor, answer, multiply_probs=None): with tf.variable_scope("merger"): c_q_merged_rep = merger.apply(is_train, tensor=context_rep, fixed_tensor=question_rep, mask=context_mask) if post_merger is not None: with tf.variable_scope("post_merger"): c_q_merged_rep = post_merger.apply(is_train, c_q_merged_rep, mask=context_mask) with tf.variable_scope("sentence_level_predictions"): sentences_logits = fully_connected( c_q_merged_rep, 1, use_bias=True, activation=None, kernel_initializer=get_keras_initialization('glorot_uniform')) max_logits = max_pool.apply(is_train, sentences_logits, context_mask) if multiply_probs is not None: max_logits = tf.log(multiply_probs + EPSILON) - tf.log(1. + tf.exp(-max_logits) - multiply_probs + EPSILON) with tf.variable_scope("predictor"): pred = predictor.apply(is_train, max_logits, answer) return c_q_merged_rep, sentences_logits, pred
def apply(self, is_train, tensor1, tensor2): init = get_keras_initialization(self.init) with tf.variable_scope('merge'): merged = self.merge.apply(is_train, tensor1, tensor2) keys = merged if self.weight_context else tensor1 keys_shape = keys.shape.as_list() if self.weight_mode == 'per_encoding': with tf.variable_scope('weighting'): weights = tf.get_variable('weights', shape=[keys_shape[1], keys_shape[2]], initializer=init) biases = tf.get_variable('biases', shape=[keys_shape[1]], initializer=tf.zeros_initializer()) unnormalized_alphas = tf.einsum('btd,td->bt', keys, weights) + biases normalized_alphas = tf.nn.softmax(unnormalized_alphas, axis=-1) weighted_rep = tf.expand_dims(normalized_alphas, axis=-1) * merged elif self.weight_mode == 'fully_connected': with tf.variable_scope('weighting'): flattened = tf.layers.flatten(keys) unnormalized_alphas = fully_connected(flattened, units=keys_shape[1], kernel_initializer=init) normalized_alphas = tf.nn.softmax(unnormalized_alphas, axis=-1) weighted_rep = tf.expand_dims(normalized_alphas, axis=-1) * merged else: raise NotImplementedError() if self.encode == 'sum': return tf.reduce_sum(weighted_rep, axis=1) elif self.encode == 'concat': return tf.layers.flatten(weighted_rep)
def rr_swide(config, wx, wx_num, cx, cx_num, sx, sx_num, n_unit, is_training): max_word_num = wx.get_shape()[1] keep_prob = config.keep_prob with tf.variable_scope('char_encode') as scope: cx_e = char_rnn_encoder(config, cx, cx_num, max_word_num) with tf.variable_scope('syll_encode') as scope: sx_e = char_rnn_encoder(config, sx, sx_num, max_word_num) wx_e = tf.concat([wx, cx_e, sx_e], axis=2) wx_e = fully_connected(wx_e, config.rnn_dim) with tf.variable_scope('dropout'): wx_e = layers.dropout( wx_e, keep_prob=keep_prob, is_training=is_training, ) with tf.variable_scope('shallow_cnn') as scope: swc = shallow_wide_cnn(wx_e, (3, 4, 5), 100) return swc, None
def apply(self, is_train, x, mask=None): gated = fully_connected(x, x.shape.as_list()[-1], activation=tf.nn.sigmoid, bias_initializer=tf.constant_initializer(self.bias) if self.bias else None, kernel_initializer=get_keras_initialization(self.init), name="compute-gate") return gated * x
def apply(self, is_train, x, mask=None): return fully_connected(x, x.shape.as_list()[-1], use_bias=self.bias, activation=activations.get(self.activation), kernel_initializer=_wrap_init(initializers.get(self.w_init)))
def apply(self, is_train, x, mask=None): bias = (self.bias is None) or self.bias # for backwards compat return fully_connected(x, self.n_out, use_bias=bias, activation=get_keras_activation(self.activation), kernel_initializer=_wrap_init(initializers.get(self.w_init)))
def _get_predictions_for(self, is_train, question_embed, question_mask, context_embed, context_mask, answer, question_lm, context_lm, sentence_segments, sentence_mask): question_rep, context_rep = question_embed, context_embed context1_rep, context2_rep = tf.unstack(context_rep, axis=1, num=2) context1_mask, context2_mask = tf.unstack(context_mask, axis=1, num=2) context1_sentence_segments, context2_sentence_segments = tf.unstack( sentence_segments, axis=1, num=2) context1_sentence_mask, context2_sentence_mask = tf.unstack( sentence_mask, axis=1, num=2) q_lm_in, c1_lm_in, c2_lm_in = [], [], [] if self.use_elmo: context1_lm, context2_lm = tf.unstack(context_lm, axis=1, num=2) q_lm_in = [question_lm] c1_lm_in = [context1_lm] c2_lm_in = [context2_lm] if self.embed_mapper is not None: with tf.variable_scope("map_embed"): context1_rep = self.embed_mapper.apply(is_train, context1_rep, context1_mask, *c1_lm_in) with tf.variable_scope("map_embed", reuse=True): context2_rep = self.embed_mapper.apply(is_train, context2_rep, context2_mask, *c2_lm_in) question_rep = self.embed_mapper.apply(is_train, question_rep, question_mask, *q_lm_in) with tf.variable_scope("seq_enc"): question_enc = self.sequence_encoder.apply(is_train, question_rep, question_mask) question_enc = tf.identity(question_enc, name='encode_question') tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, question_enc) def encode_sentences(context, sentence_segs, sentence_mask, rep_name): context = self.sentences_encoder.apply(context, sentence_segs, sentence_mask) if self.sentence_mapper is not None: with tf.variable_scope('sentence_mapper'): context = self.sentence_mapper.apply(is_train, context, mask=sentence_mask) context = tf.identity(context, name=rep_name) tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, context) return context with tf.variable_scope('sentences_enc'): context1_sent_rep = encode_sentences(context1_rep, context1_sentence_segments, context1_sentence_mask, 'encode_context1') with tf.variable_scope('sentences_enc', reuse=True): context2_sent_rep = encode_sentences(context2_rep, context2_sentence_segments, context2_sentence_mask, 'encode_context2') # First Iteration (same as in the single context model) with tf.variable_scope("context1_relevance"): c1_q_merged_rep, context1_sentences_logits, context1_pred = \ merge_weight_predict(is_train=is_train, context_rep=context1_sent_rep, question_rep=question_enc, context_mask=context1_sentence_mask, merger=self.merger, post_merger=self.post_merger, max_pool=self.max_pool, predictor=self.predictor, answer=[answer[0]]) # Question Reformulation with tf.variable_scope("reformulation"): with tf.variable_scope('c2q'): question_rep = self.context_to_question_attention.apply( is_train, x=question_rep, keys=context1_rep, memories=context1_rep, x_mask=question_mask, memory_mask=context1_mask) reread_q_enc = self.sequence_encoder.apply( is_train, question_rep, question_mask) with tf.variable_scope('q2c'): context1_rep = self.question_to_context_attention.apply( is_train, x=context1_rep, keys=question_rep, memories=question_rep, x_mask=context1_mask, memory_mask=question_mask) reread_c1_enc = self.sequence_encoder.apply( is_train, context1_rep, context1_mask) with tf.variable_scope('reread_merge'): reformulated_q = self.reread_merger.apply( is_train, reread_q_enc, reread_c1_enc) reformulated_q = fully_connected( reformulated_q, c1_q_merged_rep.shape.as_list()[-1], use_bias=True, activation=get_keras_activation('relu'), kernel_initializer=get_keras_initialization( 'glorot_uniform')) reformulated_q = tf.identity(reformulated_q, name='reformulated_question') tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, reformulated_q) # Second Iteration with tf.variable_scope("context2_relevance"): first_iter_probs = None if self.multiply_iteration_probs: first_iter_probs = tf.expand_dims(context1_pred.get_probs(), axis=1) c2_q_merged_rep, context2_sentences_logits, context2_pred = \ merge_weight_predict(is_train=is_train, context_rep=context2_sent_rep, question_rep=reformulated_q, context_mask=context2_sentence_mask, merger=self.merger, post_merger=self.post_merger, max_pool=self.max_pool, predictor=self.predictor, answer=[answer[1]], multiply_probs=first_iter_probs) return MultipleBinaryPredictions([context1_pred, context2_pred])
(character_size, char_dim)) char_embed = tf.nn.embedding_lookup(char_embedder, chars) syll_embedder = tf.get_variable('syll_embedder', (syll_size, syll_dim)) syll_embed = tf.nn.embedding_lookup(syll_embedder, sylls) from core_layer import han1_syll_cnn_char_rnn, han1_syll_cnn_char_cnn core_layer_output = han1_syll_cnn_char_cnn(config, word_embed, sent_len, char_embed, word_len, syll_embed, None, fc_dim, is_training) with tf.variable_scope("output"): output = fully_connected( core_layer_output, fc_dim, use_bias=True, activation=activations.get("relu"), kernel_initializer=initializers.get("glorot_uniform")) output = layers.dropout(output, keep_prob=config.keep_prob, is_training=is_training) output = fully_connected( output, 1, use_bias=True, activation=None, kernel_initializer=initializers.get("glorot_uniform")) y_logits = tf.sigmoid(output) * 9 + 1 predictions = y_logits acc = tf.reduce_mean(
def _get_predictions_for(self, is_train, question_embed, question_mask, context_embed, context_mask, answer, question_lm, context_lm, sentence_segments, sentence_mask): question_rep, context_rep = question_embed, context_embed context_rep, = tf.unstack(context_rep, axis=1, num=1) context_mask, = tf.unstack(context_mask, axis=1, num=1) context_sentence_segments, = tf.unstack(sentence_segments, axis=1, num=1) context_sentence_mask, = tf.unstack(sentence_mask, axis=1, num=1) q_lm_in, c_lm_in = [], [] if self.use_elmo: context_lm, = tf.unstack(context_lm, axis=1, num=1) q_lm_in = [question_lm] c_lm_in = [context_lm] if self.embed_mapper is not None: with tf.variable_scope("map_embed"): context_rep = self.embed_mapper.apply(is_train, context_rep, context_mask, *c_lm_in) with tf.variable_scope("map_embed", reuse=True): question_rep = self.embed_mapper.apply(is_train, question_rep, question_mask, *q_lm_in) with tf.variable_scope('yes_no_question_prediction'): yes_no_q_enc = self.yes_no_question_encoder.apply( is_train, question_rep, question_mask) yes_no_choice_logits = fully_connected( yes_no_q_enc, 2, use_bias=True, activation=None, kernel_initializer=get_keras_initialization('glorot_uniform'), name='yes_no_choice') if self.question_mapper is not None: with tf.variable_scope("map_question"): question_rep = self.question_mapper.apply( is_train, question_rep, question_mask) if self.context_mapper is not None: with tf.variable_scope("map_context"): context_rep = self.context_mapper.apply( is_train, context_rep, context_mask) with tf.variable_scope("buid_memories"): keys, memories = self.memory_builder.apply(is_train, question_rep, question_mask) with tf.variable_scope("apply_attention"): context_rep = self.attention.apply(is_train, context_rep, keys, memories, context_mask, question_mask) if self.match_encoder is not None: with tf.variable_scope("process_attention"): context_rep = self.match_encoder.apply(is_train, context_rep, context_mask) with tf.variable_scope('yes_no_answer_prediction'): yes_no_c_enc = self.yes_no_context_encoder.apply( is_train, context_rep, context_mask) yes_no_answer_logits = fully_connected( yes_no_c_enc, 2, use_bias=True, activation=None, kernel_initializer=get_keras_initialization('glorot_uniform'), name='yes_no_answer') with tf.variable_scope('supporting_fact_prediction'): pre_context_sents = context_rep if self.pre_sp_mapper is not None: with tf.variable_scope('pre_sp_mapper'): pre_context_sents = self.pre_sp_mapper.apply( is_train, pre_context_sents, context_mask) context_sents = self.sentences_encoder.apply( pre_context_sents, context_sentence_segments, context_sentence_mask) context_sents = tf.identity(context_sents, name='debug') if self.sentence_mapper is not None: with tf.variable_scope('sentence_mapper'): context_sents = self.sentence_mapper.apply( is_train, context_sents, mask=context_sentence_mask) sentences_logits = fully_connected( context_sents, 1, use_bias=True, activation=None, kernel_initializer=get_keras_initialization('glorot_uniform'), name='supporting_fact_fc') with tf.variable_scope("predict"): return self.predictor.apply( is_train, context_rep, answer, context_mask, yes_no_choice_logits=yes_no_choice_logits, yes_no_answer_logits=yes_no_answer_logits, sentence_logits=tf.squeeze(sentences_logits, axis=[2]), sentence_mask=context_sentence_mask)
cx_ = tf.placeholder(tf.int32, (None, max_word_num, max_char_num), name='cx_') sx_ = tf.placeholder(tf.int32, (None, max_word_num, max_syll_num), name='sx_') y_ = tf.placeholder(tf.int32, (None), name='y_') c_embed = tf.get_variable('c_embed', (character_size, char_dim)) s_embed = tf.get_variable('s_embed', (syllable_size, syll_dim)) cx = tf.nn.embedding_lookup(c_embed, cx_) sx = tf.nn.embedding_lookup(s_embed, sx_) core_output = cnn_char_syll(config, wx, cx, sx, is_training) preds = fully_connected( core_output, 10, activation=activations.get('relu'), kernel_initializer=initializers.get('glorot_uniform')) pred = tf.argmax(preds, axis=1, output_type=tf.int32) + 1 y_arr = tf.one_hot(y_, 10) acc = tf.reduce_mean(tf.to_float(tf.equal(pred, y_))) loss = tf.losses.mean_squared_error(y_arr, preds) mse = tf.losses.mean_squared_error(y_, pred) train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) ############################################################################################################## sess = tf.InteractiveSession() tf.global_variables_initializer().run()
def rr_han(config, word_embed, sent_len, char_embed, word_len, syll_embed, syll_len, n_unit, is_training): ''' HAN 1 layer with char rnn @ Input spec word_embed [batch_size, max_sent_len, word_dim] sent_len [batch_size] char_embed [batch_size, max_sent_len, max_word_len, char_dim] word_len [batch_size, max_sent_len] syll_embed [batch_size, max_sent_len, max_syll_len, syll_dim] syll_len [batch_size, max_sent_len] @ Output spec return [batch, n_unit] ''' char_dim = config.char_dim syll_dim = config.syll_dim max_sent_len = config.max_sentence_length max_word_len = config.max_word_length max_syll_num = config.max_syll_num keep_prob = config.keep_prob rnn_dim = config.rnn_dim with tf.variable_scope('syll_rnn') as scope: cell_stack_count = 2 syll_cell = MultiRNNCell([GRUCell(syll_dim)] * cell_stack_count) syll_embed = tf.cast( tf.reshape(syll_embed, [-1, max_syll_num, syll_dim]), tf.float32) syll_len = tf.reshape(syll_len, [-1]) _, syll_rnn_embed = bidirectional_rnn(syll_cell, syll_cell, syll_embed, syll_len, scope=scope) syll_rnn_embed = tf.reshape( syll_rnn_embed, [-1, max_sent_len, syll_dim * 2 * cell_stack_count]) with tf.variable_scope('char_rnn') as scope: cell_stack_count = 2 char_cell = MultiRNNCell([GRUCell(char_dim)] * cell_stack_count) char_embed = tf.cast( tf.reshape(char_embed, [-1, max_word_len, char_dim]), tf.float32) word_len = tf.reshape(word_len, [-1]) _, char_rnn_embed = bidirectional_rnn(char_cell, char_cell, char_embed, word_len, scope=scope) char_rnn_embed = tf.reshape( char_rnn_embed, [-1, max_sent_len, char_dim * 2 * cell_stack_count]) word_char_concat = tf.concat([word_embed, char_rnn_embed, syll_rnn_embed], axis=2) with tf.variable_scope('embedding') as scope: word_char_embed = fully_connected( word_char_concat, rnn_dim, use_bias=True, activation=activations.get("relu"), kernel_initializer=initializers.get("glorot_uniform")) with tf.variable_scope('dropout'): word_char_embed = layers.dropout( word_char_embed, keep_prob=keep_prob, is_training=is_training, ) with tf.variable_scope('encoder') as scope: cell = MultiRNNCell([GRUCell(rnn_dim)] * 3) encoder_output, _ = bidirectional_rnn(cell, cell, word_char_embed, sent_len, scope=scope) with tf.variable_scope('attention') as scope: attn_sum_output = task_specific_attention(encoder_output, n_unit, scope=scope) with tf.variable_scope('dropout'): attn_sum_output = layers.dropout( attn_sum_output, keep_prob=keep_prob, is_training=is_training, ) return attn_sum_output