def _build_model_op(self): with tf.variable_scope("bi_directional_rnn"): cell_fw = self._create_single_rnn_cell(self.cfg["num_units"]) cell_bw = self._create_single_rnn_cell(self.cfg["num_units"]) if self.cfg["use_residual"]: self.word_emb = tf.layers.dense(self.word_emb, units=self.cfg["num_units"], use_bias=False, name="word_input_project") if self.cfg["use_chars"]: self.chars_emb = tf.layers.dense( self.chars_emb, units=self.cfg["num_units"], use_bias=False, name="chars_input_project") rnn_outs, _ = bidirectional_dynamic_rnn( cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len, dtype=tf.float32, scope="bi_rnn") rnn_outs = tf.concat(rnn_outs, axis=-1) print("Bi-directional RNN output shape on word: {}".format( rnn_outs.get_shape().as_list())) if self.cfg["use_chars"]: tf.get_variable_scope().reuse_variables() chars_rnn_outs, _ = bidirectional_dynamic_rnn( cell_fw, cell_bw, self.chars_emb, dtype=tf.float32, sequence_length=self.seq_len, scope="bi_rnn") chars_rnn_outs = tf.concat(chars_rnn_outs, axis=-1) print("Bi-directional RNN output shape on chars: {}".format( chars_rnn_outs.get_shape().as_list())) rnn_outs = rnn_outs + chars_rnn_outs rnn_outs = layer_normalize(rnn_outs) with tf.variable_scope("multi_head_attention"): attn_outs = multi_head_attention(rnn_outs, rnn_outs, self.cfg["num_heads"], self.cfg["attention_size"], drop_rate=self.attn_drop_rate, is_train=self.is_train) if self.cfg["use_residual"]: attn_outs = attn_outs + rnn_outs attn_outs = layer_normalize( attn_outs) # residual connection and layer norm print("multi-heads attention output shape: {}".format( attn_outs.get_shape().as_list())) with tf.variable_scope("projection"): self.logits = tf.layers.dense(attn_outs, units=self.tag_vocab_size, use_bias=True) print("logits shape: {}".format(self.logits.get_shape().as_list()))
def _build_model_op(self): with tf.variable_scope("bi_directional_rnn"): cell_fw = self._create_rnn_cell() cell_bw = self._create_rnn_cell() if self.cfg["use_stack_rnn"]: rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32, sequence_length=self.seq_len) else: rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len, dtype=tf.float32) rnn_outs = tf.concat(rnn_outs, axis=-1) rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train) if self.cfg["use_residual"]: word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False) rnn_outs = rnn_outs + word_project outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs # print("rnn output shape: {}".format(outputs.get_shape().as_list())) if self.cfg["use_attention"] == "self_attention": with tf.variable_scope("self_attention"): attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"], drop_rate=self.drop_rate, is_train=self.is_train) if self.cfg["use_residual"]: attn_outs = attn_outs + outputs outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs print("self-attention output shape: {}".format(outputs.get_shape().as_list())) elif self.cfg["use_attention"] == "normal_attention": with tf.variable_scope("normal_attention"): context = tf.transpose(outputs, [1, 0, 2]) p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False) p_context = tf.transpose(p_context, [1, 0, 2]) attn_cell = AttentionCell(self.cfg["num_units"], context, p_context) # time major based attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True, dtype=tf.float32) outputs = tf.transpose(attn_outs, [1, 0, 2]) print("attention output shape: {}".format(outputs.get_shape().as_list())) with tf.variable_scope("project"): self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True)