def model_fn(self, features, labels, mode, params): _, final_states = tf.nn.dynamic_rnn( cell=lstm_cell(**params, mode=mode), inputs=features["sentence_emb"], sequence_length=features["sentence_len"], dtype=tf.float32, ) logits = tf.layers.dense( inputs=final_states.h, units=params["_n_out_classes"], kernel_initializer=params["initializer"], bias_initializer=params["initializer"], ) loss = tf.losses.sparse_softmax_cross_entropy( labels=labels, logits=logits ) optimizer = resolve_optimizer(**params) return self.make_estimator_spec( mode=mode, logits=logits, optimizer=optimizer, loss=loss )
def model_fn(self, features, labels, mode, params): with tf.variable_scope("left_lstm"): _, final_states_left = tf.nn.dynamic_rnn( cell=lstm_cell(**params, mode=mode), inputs=features["left_emb"], sequence_length=features["left_len"], dtype=tf.float32, ) with tf.variable_scope("right_lstm"): _, final_states_right = tf.nn.dynamic_rnn( cell=lstm_cell(**params, mode=mode), inputs=features["right_emb"], sequence_length=features["right_len"], dtype=tf.float32, ) concatenated_final_states = tf.concat( [final_states_left.h, final_states_right.h], axis=1) logits = tf.layers.dense( inputs=concatenated_final_states, units=params["_n_out_classes"], kernel_initializer=params["initializer"], bias_initializer=params["initializer"], ) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) optimizer = resolve_optimizer(**params) return self.make_estimator_spec(mode=mode, logits=logits, optimizer=optimizer, loss=loss)
def model_fn(self, features, labels, mode, params): target_offset = tf.cast(features["target_offset"], tf.int32) memory = features["context_emb"] max_ctxt_len = tf.shape(memory)[1] context_locations = get_absolute_distance_vector( target_locs=target_offset, seq_lens=features["context_len"], max_seq_len=max_ctxt_len, ) v_aspect = variable_len_batch_mean( input_tensor=features["target_emb"], seq_lengths=features["target_len"], op_name="target_embedding_avg", ) attn_snapshots = create_snapshots_container( shape_like=features["context_ids"], n_snaps=params["n_hops"] ) hop_number = tf.constant(1) initial_hop_inputs = (hop_number, memory, v_aspect, attn_snapshots) def condition(hop_num, ext_memory, input_vec, attn_snapshots): return tf.less_equal(hop_num, params["n_hops"]) def hop(hop_num, ext_memory, input_vec, attn_snapshots): location_vector_model_fn = get_location_vector_model( model_num=params["location_model"] ) v_loc = location_vector_model_fn( locs=context_locations, seq_lens=features["context_len"], emb_dim=params["_embedding_dim"], hop=hop_num, init=params["initializer"], ) if params["location_model"] == 3: ext_memory = ext_memory + v_loc else: ext_memory = tf.multiply(memory, v_loc) with tf.variable_scope("linear_layer", reuse=tf.AUTO_REUSE): linear_out = tf.layers.dense( inputs=tf.squeeze(input_vec, axis=1), units=params["_embedding_dim"], activation=None, kernel_initializer=params["initializer"], bias_initializer=params["initializer"], ) with tf.variable_scope("attention_layer", reuse=tf.AUTO_REUSE): attn_out, attn_snapshot = memnet_content_attn_unit( seq_lens=features["context_len"], memory=ext_memory, v_aspect=input_vec, emb_dim=params["_embedding_dim"], init=params["initializer"], ) attn_snapshots = append_snapshot( container=attn_snapshots, new_snap=attn_snapshot, index=hop_num ) output_vec = attn_out + linear_out output_vec = tf.expand_dims(output_vec, axis=1) hop_num = tf.add(hop_num, 1) return (hop_num, ext_memory, output_vec, attn_snapshots) _, _, final_sentence_rep, attn_snapshots = tf.while_loop( cond=condition, body=hop, loop_vars=initial_hop_inputs, shape_invariants=( hop_number.get_shape(), memory.get_shape(), v_aspect.get_shape(), tf.TensorShape(dims=[params["n_hops"], None, None, 1]), ), ) literals, attn_snapshots = zip_attn_snapshots_with_literals( literals=features["context"], snapshots=attn_snapshots, num_layers=params["n_hops"], ) attn_info = tf.tuple([literals, attn_snapshots]) generate_attn_heatmap_summary(attn_info) final_sentence_rep = tf.squeeze(final_sentence_rep, axis=1) logits = tf.layers.dense( inputs=final_sentence_rep, units=params["_n_out_classes"], kernel_initializer=params["initializer"], bias_initializer=params["initializer"], ) loss = tf.losses.sparse_softmax_cross_entropy( labels=labels, logits=logits ) optimizer = resolve_optimizer(**params) return self.make_estimator_spec( mode=mode, logits=logits, optimizer=optimizer, loss=loss )
def model_fn(self, features, labels, mode, params): with tf.variable_scope("target_bi_lstm"): features["target_emb"] = tf.nn.dropout( features["target_emb"], keep_prob=params["keep_prob"]) target_hidden_states, _, _ = stack_bidirectional_dynamic_rnn( cells_fw=[lstm_cell(**params, mode=mode)], cells_bw=[lstm_cell(**params, mode=mode)], inputs=features["target_emb"], sequence_length=features["target_len"], dtype=tf.float32, ) r_t = variable_len_batch_mean( input_tensor=target_hidden_states, seq_lengths=features["target_len"], op_name="target_avg_pooling", ) with tf.variable_scope("left_bi_lstm"): features["left_emb"] = tf.nn.dropout(features["left_emb"], keep_prob=params["keep_prob"]) left_hidden_states, _, _ = stack_bidirectional_dynamic_rnn( cells_fw=[lstm_cell(**params, mode=mode)], cells_bw=[lstm_cell(**params, mode=mode)], inputs=features["left_emb"], sequence_length=features["left_len"], dtype=tf.float32, ) with tf.variable_scope("right_bi_lstm"): features["right_emb"] = tf.nn.dropout( features["right_emb"], keep_prob=params["keep_prob"]) right_hidden_states, _, _ = stack_bidirectional_dynamic_rnn( cells_fw=[lstm_cell(**params, mode=mode)], cells_bw=[lstm_cell(**params, mode=mode)], inputs=features["right_emb"], sequence_length=features["right_len"], dtype=tf.float32, ) with tf.variable_scope("left_t2c_attn"): left_hidden_states = tf.nn.dropout(left_hidden_states, keep_prob=params["keep_prob"]) r_l, left_attn_info = attention_unit( h_states=left_hidden_states, hidden_units=params["hidden_units"] * 2, seq_lengths=features["left_len"], attn_focus=r_t, init=params["initializer"], bias_init=params["bias_initializer"], sp_literal=features["left"], ) with tf.variable_scope("right_t2c_attn"): right_hidden_states = tf.nn.dropout(right_hidden_states, keep_prob=params["keep_prob"]) r_r, right_attn_info = attention_unit( h_states=right_hidden_states, hidden_units=params["hidden_units"] * 2, seq_lengths=features["right_len"], attn_focus=r_t, init=params["initializer"], bias_init=params["bias_initializer"], sp_literal=features["right"], ) target_hidden_states = tf.nn.dropout(target_hidden_states, keep_prob=params["keep_prob"]) with tf.variable_scope("left_c2t_attn"): r_t_l, left_target_attn_info = attention_unit( h_states=target_hidden_states, hidden_units=params["hidden_units"] * 2, seq_lengths=features["target_len"], attn_focus=tf.expand_dims(r_l, axis=1), init=params["initializer"], bias_init=params["bias_initializer"], sp_literal=features["target"], ) with tf.variable_scope("right_c2t_attn"): r_t_r, right_target_attn_info = attention_unit( h_states=target_hidden_states, hidden_units=params["hidden_units"] * 2, seq_lengths=features["target_len"], attn_focus=tf.expand_dims(r_r, axis=1), init=params["initializer"], bias_init=params["bias_initializer"], sp_literal=features["target"], ) generate_attn_heatmap_summary( left_attn_info, left_target_attn_info, right_target_attn_info, right_attn_info, ) final_sentence_rep = tf.concat([r_l, r_t_l, r_t_r, r_r], axis=1) final_sentence_rep = tf.nn.dropout(final_sentence_rep, keep_prob=params["keep_prob"]) logits = tf.layers.dense( inputs=final_sentence_rep, units=params["_n_out_classes"], kernel_initializer=params["initializer"], bias_initializer=params["bias_initializer"], ) loss = l2_regularized_loss(labels=labels, logits=logits, l2_weight=params["l2_weight"]) optimizer = resolve_optimizer(**params) return self.make_estimator_spec(mode=mode, logits=logits, optimizer=optimizer, loss=loss)
def model_fn(self, features, labels, mode, params): with tf.variable_scope("context_lstm"): features["context_emb"] = tf.nn.dropout( features["context_emb"], keep_prob=params["keep_prob"] ) context_hidden_states, _ = tf.nn.dynamic_rnn( cell=lstm_cell(**params), inputs=features["context_emb"], sequence_length=features["context_len"], dtype=tf.float32, ) c_avg = variable_len_batch_mean( input_tensor=context_hidden_states, seq_lengths=features["context_len"], op_name="context_avg_pooling", ) with tf.variable_scope("target_lstm"): features["target_emb"] = tf.nn.dropout( features["target_emb"], keep_prob=params["keep_prob"] ) target_hidden_states, _ = tf.nn.dynamic_rnn( cell=lstm_cell(**params), inputs=features["target_emb"], sequence_length=features["target_len"], dtype=tf.float32, ) t_avg = variable_len_batch_mean( input_tensor=target_hidden_states, seq_lengths=features["target_len"], op_name="target_avg_pooling", ) with tf.variable_scope("attention_layer", reuse=tf.AUTO_REUSE): context_hidden_states = tf.nn.dropout( context_hidden_states, keep_prob=params["keep_prob"] ) c_r, ctxt_attn_info = attention_unit( h_states=context_hidden_states, hidden_units=params["hidden_units"], seq_lengths=features["context_len"], attn_focus=t_avg, init=params["initializer"], bias_init=params["bias_initializer"], sp_literal=features["context"], ) target_hidden_states = tf.nn.dropout( target_hidden_states, keep_prob=params["keep_prob"] ) t_r, trg_attn_info = attention_unit( h_states=target_hidden_states, hidden_units=params["hidden_units"], seq_lengths=features["target_len"], attn_focus=c_avg, init=params["initializer"], bias_init=params["bias_initializer"], sp_literal=features["target"], ) generate_attn_heatmap_summary(trg_attn_info, ctxt_attn_info) final_sentence_rep = tf.concat([t_r, c_r], axis=1) final_sentence_rep = tf.nn.dropout( final_sentence_rep, keep_prob=params["keep_prob"] ) logits = tf.layers.dense( inputs=final_sentence_rep, units=params["_n_out_classes"], activation=tf.nn.tanh, kernel_initializer=params["initializer"], bias_initializer=params.get( "bias_initializer", params["initializer"] ), ) loss = l2_regularized_loss( labels=labels, logits=logits, l2_weight=params["l2_weight"] ) optimizer = resolve_optimizer(**params) return self.make_estimator_spec( mode=mode, logits=logits, optimizer=optimizer, loss=loss )
def model_fn(self, features, labels, mode, params): max_left_len = tf.shape(features["left_emb"])[1] max_right_len = tf.shape(features["right_emb"])[1] with tf.name_scope("target_connection"): mean_target_embedding = variable_len_batch_mean( input_tensor=features["target_emb"], seq_lengths=features["target_len"], op_name="target_embedding_avg", ) features["left_emb"] = tf.stack( values=[ features["left_emb"], tf.ones(tf.shape(features["left_emb"])) * mean_target_embedding, ], axis=2, ) features["left_emb"] = tf.reshape( tensor=features["left_emb"], shape=[-1, max_left_len, 2 * params["_embedding_dim"]], ) features["right_emb"] = tf.stack( values=[ features["right_emb"], tf.ones(tf.shape(features["right_emb"])) * mean_target_embedding, ], axis=2, ) features["right_emb"] = tf.reshape( tensor=features["right_emb"], shape=[-1, max_right_len, 2 * params["_embedding_dim"]], ) with tf.variable_scope("left_lstm"): _, final_states_left = tf.nn.dynamic_rnn( cell=lstm_cell(**params, mode=mode), inputs=features["left_emb"], sequence_length=features["left_len"], dtype=tf.float32, ) with tf.variable_scope("right_lstm"): _, final_states_right = tf.nn.dynamic_rnn( cell=lstm_cell(**params, mode=mode), inputs=features["right_emb"], sequence_length=features["right_len"], dtype=tf.float32, ) concatenated_final_states = tf.concat( [final_states_left.h, final_states_right.h], axis=1) logits = tf.layers.dense( inputs=concatenated_final_states, units=params["_n_out_classes"], kernel_initializer=params["initializer"], bias_initializer=params["initializer"], ) loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) optimizer = resolve_optimizer(**params) return self.make_estimator_spec(mode=mode, logits=logits, optimizer=optimizer, loss=loss)