Esempio n. 1
0
    def _build_model_op(self):
        with tf.variable_scope("bi_directional_rnn"):
            cell_fw = self._create_single_rnn_cell(self.cfg["num_units"])
            cell_bw = self._create_single_rnn_cell(self.cfg["num_units"])
            if self.cfg["use_residual"]:
                self.word_emb = tf.layers.dense(self.word_emb,
                                                units=self.cfg["num_units"],
                                                use_bias=False,
                                                name="word_input_project")
                if self.cfg["use_chars"]:
                    self.chars_emb = tf.layers.dense(
                        self.chars_emb,
                        units=self.cfg["num_units"],
                        use_bias=False,
                        name="chars_input_project")

            rnn_outs, _ = bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                self.word_emb,
                sequence_length=self.seq_len,
                dtype=tf.float32,
                scope="bi_rnn")
            rnn_outs = tf.concat(rnn_outs, axis=-1)
            print("Bi-directional RNN output shape on word: {}".format(
                rnn_outs.get_shape().as_list()))
            if self.cfg["use_chars"]:
                tf.get_variable_scope().reuse_variables()
                chars_rnn_outs, _ = bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    self.chars_emb,
                    dtype=tf.float32,
                    sequence_length=self.seq_len,
                    scope="bi_rnn")
                chars_rnn_outs = tf.concat(chars_rnn_outs, axis=-1)
                print("Bi-directional RNN output shape on chars: {}".format(
                    chars_rnn_outs.get_shape().as_list()))
                rnn_outs = rnn_outs + chars_rnn_outs
            rnn_outs = layer_normalize(rnn_outs)

        with tf.variable_scope("multi_head_attention"):
            attn_outs = multi_head_attention(rnn_outs,
                                             rnn_outs,
                                             self.cfg["num_heads"],
                                             self.cfg["attention_size"],
                                             drop_rate=self.attn_drop_rate,
                                             is_train=self.is_train)
            if self.cfg["use_residual"]:
                attn_outs = attn_outs + rnn_outs
            attn_outs = layer_normalize(
                attn_outs)  # residual connection and layer norm
            print("multi-heads attention output shape: {}".format(
                attn_outs.get_shape().as_list()))

        with tf.variable_scope("projection"):
            self.logits = tf.layers.dense(attn_outs,
                                          units=self.tag_vocab_size,
                                          use_bias=True)
            print("logits shape: {}".format(self.logits.get_shape().as_list()))
Esempio n. 2
0
	def _build_model_op(self):
		with tf.variable_scope("bi_directional_rnn"):
			cell_fw = self._create_rnn_cell()
			cell_bw = self._create_rnn_cell()
			if self.cfg["use_stack_rnn"]:
				rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32,
															   sequence_length=self.seq_len)
			else:
				rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len,
														 dtype=tf.float32)
			rnn_outs = tf.concat(rnn_outs, axis=-1)
			rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train)
			if self.cfg["use_residual"]:
				word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False)
				rnn_outs = rnn_outs + word_project
			outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs
			# print("rnn output shape: {}".format(outputs.get_shape().as_list()))

		if self.cfg["use_attention"] == "self_attention":
			with tf.variable_scope("self_attention"):
				attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"],
												 drop_rate=self.drop_rate, is_train=self.is_train)
				if self.cfg["use_residual"]:
					attn_outs = attn_outs + outputs
				outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs
				print("self-attention output shape: {}".format(outputs.get_shape().as_list()))

		elif self.cfg["use_attention"] == "normal_attention":
			with tf.variable_scope("normal_attention"):
				context = tf.transpose(outputs, [1, 0, 2])
				p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False)
				p_context = tf.transpose(p_context, [1, 0, 2])
				attn_cell = AttentionCell(self.cfg["num_units"], context, p_context)  # time major based
				attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True,
										   dtype=tf.float32)
				outputs = tf.transpose(attn_outs, [1, 0, 2])
				print("attention output shape: {}".format(outputs.get_shape().as_list()))

		with tf.variable_scope("project"):
			self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True)