コード例 #1
0
    def attention(self, embed, query):
        """
		注意力机制
		:param embed:
		:param query:
		:return:
		"""
        with tf.name_scope("attention"):
            w = tf.get_variable(name="attention_w",
                                shape=[2 * hp.num_units, hp.attention_size],
                                dtype=tf.float32)
            b = tf.get_variable(name="attention_b",
                                shape=[hp.attention_size],
                                dtype=tf.float32)
            u = tf.get_variable(name="attention_u",
                                shape=[hp.attention_size, 1],
                                dtype=tf.float32)
            value = tf.concat([embed, query], axis=-1)
            value = tf.reshape(value, [-1, 2 * hp.num_units])
            attention = tf.matmul(tf.tanh(tf.matmul(value, w) + b), u)
            attention = tf.reshape(attention, shape=[-1, self.max_len])
            attention = tf.nn.softmax(attention, axis=-1)
            attention = tf.tile(tf.expand_dims(attention, axis=-1),
                                multiples=[1, 1, hp.num_units])

            output = tf.reduce_sum(attention * query, axis=1)
            output = layer_normalize(output)
            return output
コード例 #2
0
	def multi_dense_layer(inputs):
		"""
		多层感知机 T*T*channel -> dense_size ->2
		:param inputs: batch T T channel
		:return:
		"""
		_, width, height, channel = inputs.get_shape().as_list()
		size = width * height * channel
		inputs = tf.reshape(inputs, shape=[-1, size])
		with tf.variable_scope("dense_layer"):
			w = tf.get_variable(name='w', dtype=tf.float32, shape=[size, hp.dense_size])
			b = tf.get_variable(name='b', dtype=tf.float32, shape=[hp.dense_size])
			outputs = layer_normalize(tf.matmul(inputs, w) + b, )
		
		with tf.variable_scope("logit_layer"):
			w = tf.get_variable(name='w', dtype=tf.float32, shape=[hp.dense_size, 2])
			b = tf.get_variable(name='b', dtype=tf.float32, shape=[2])
			outputs = tf.nn.softmax(tf.matmul(outputs, w) + b, axis=-1)
		pre_y = tf.cast(tf.argmax(outputs, axis=-1), dtype=tf.int32)
		return outputs, pre_y