def attention(self, embed, query): """ 注意力机制 :param embed: :param query: :return: """ with tf.name_scope("attention"): w = tf.get_variable(name="attention_w", shape=[2 * hp.num_units, hp.attention_size], dtype=tf.float32) b = tf.get_variable(name="attention_b", shape=[hp.attention_size], dtype=tf.float32) u = tf.get_variable(name="attention_u", shape=[hp.attention_size, 1], dtype=tf.float32) value = tf.concat([embed, query], axis=-1) value = tf.reshape(value, [-1, 2 * hp.num_units]) attention = tf.matmul(tf.tanh(tf.matmul(value, w) + b), u) attention = tf.reshape(attention, shape=[-1, self.max_len]) attention = tf.nn.softmax(attention, axis=-1) attention = tf.tile(tf.expand_dims(attention, axis=-1), multiples=[1, 1, hp.num_units]) output = tf.reduce_sum(attention * query, axis=1) output = layer_normalize(output) return output
def multi_dense_layer(inputs): """ 多层感知机 T*T*channel -> dense_size ->2 :param inputs: batch T T channel :return: """ _, width, height, channel = inputs.get_shape().as_list() size = width * height * channel inputs = tf.reshape(inputs, shape=[-1, size]) with tf.variable_scope("dense_layer"): w = tf.get_variable(name='w', dtype=tf.float32, shape=[size, hp.dense_size]) b = tf.get_variable(name='b', dtype=tf.float32, shape=[hp.dense_size]) outputs = layer_normalize(tf.matmul(inputs, w) + b, ) with tf.variable_scope("logit_layer"): w = tf.get_variable(name='w', dtype=tf.float32, shape=[hp.dense_size, 2]) b = tf.get_variable(name='b', dtype=tf.float32, shape=[2]) outputs = tf.nn.softmax(tf.matmul(outputs, w) + b, axis=-1) pre_y = tf.cast(tf.argmax(outputs, axis=-1), dtype=tf.int32) return outputs, pre_y