Ejemplo n.º 1
0
def fast_dot_product_attention(q, k, v, bias, dropout_rate=None, name=None):
    """fast dot-product attention.
    deal with special case(the length of q is equal to 1)

    :param q: A tensor with shape [batch, heads, 1, depth_k]
    :param k: A tensor with shape [batch, heads, length_kv, depth_k]
    :param v: A tensor with shape [batch, heads, length_kv, depth_v]

    :returns: A tensor with shape [batch, heads, 1, depth_v]
    """

    with tf.variable_scope(name,
                           default_name="dot_product_attention",
                           values=[q, k, v]):
        # [batch, num_heads, query_length, memory_length]
        logits = tf.expand_dims(tf.reduce_sum(q * k, axis=3), axis=2)
        if bias is not None:
            logits += bias
        weights = tf.nn.softmax(logits, name="attention_weights")

        if dropout_rate is not None and dropout_rate > 0.0:
            weights = tf.nn.dropout(weights, 1 - dropout_rate)

        weights_shape = infer_shape(weights)
        new_shape = weights_shape[:-2]
        new_shape.append(weights_shape[-1])
        new_shape.append(1)
        weights = tf.reshape(weights, new_shape)
        return tf.expand_dims(tf.reduce_sum(weights * v, axis=2), axis=2)
Ejemplo n.º 2
0
def restore_variables(checkpoint):
    if tf.train.latest_checkpoint(checkpoint) is None:
        return tf.no_op("restore_op")

    # Load checkpoints
    tf.logging.info("Loading %s" % checkpoint)
    var_list = tf.train.list_variables(checkpoint)
    reader = tf.train.load_checkpoint(checkpoint)
    values = {}

    for (name, shape) in var_list:
        tensor = reader.get_tensor(name)
        name = name.split(":")[0]
        values[name] = tensor

    var_list = tf.trainable_variables()
    ops = []

    for var in var_list:
        name = var.name.split(":")[0]

        if name in values:
            tf.logging.info("Restore %s" % var.name)
            ops.append(tf.assign(var, values[name]))
        else:
            tf.logging.info("Initialize %s" % var.name)
            ops.append(tf.assign(var, tf.zeros(infer_shape(var))))

    return tf.group(*ops, name="restore_op")
Ejemplo n.º 3
0
def combine_heads(x, name=None):
    """ Combine heads

    :param x: A tensor with shape [batch, heads, length, channels]
    :param name: An optional string

    :returns: A tensor with shape [batch, length, heads * channels]
    """

    with tf.name_scope(name, default_name="combine_heads", values=[x]):
        x = tf.transpose(x, [0, 2, 1, 3])
        x_shape = infer_shape(x)
        a, b = x_shape[-2:]
        return tf.reshape(x, x_shape[:-2] + [a * b])
Ejemplo n.º 4
0
def linear(input_data, output_size, bias=True, dtype=None, scope=None):
    """
    output = input_data * W + b
    """
    with tf.variable_scope(scope, default_name="linear"):
        input_shape = infer_shape(input_data)
        input_size = input_shape[-1]
        output_shape = tf.concat([input_shape[:-1], [output_size]], axis=0)

        W = tf.get_variable("W", shape=[input_size, output_size], dtype=dtype)
        output = tf.matmul(tf.reshape(input_data, [-1, input_size]), W)

        if bias:
            bias = tf.get_variable("b", shape=[output_size], dtype=dtype)
            output = output + bias

        return tf.reshape(output, output_shape)
Ejemplo n.º 5
0
def split_heads(x, num_heads, name=None):
    """ Split heads

    :param x: A tensor with shape [batch, length, channels]
    :param num_heads: An integer
    :param name: An optional string

    :returns: A tensor with shape [batch, heads, length, channels / heads]
    """
    with tf.name_scope(name, default_name="split_heads", values=[x]):
        x_shape = infer_shape(x)
        m = x_shape[-1]
        if isinstance(m, int) and isinstance(num_heads, int):
            assert m % num_heads == 0
        return tf.transpose(
            tf.reshape(x, x_shape[:-1] + [num_heads, m // num_heads]),
            [0, 2, 1, 3])
Ejemplo n.º 6
0
def layer_norm(input_data, epsilon=1e-6, dtype=None, scope=None):
    with tf.variable_scope(scope, default_name="layer_norm"):
        input_size = infer_shape(input_data)[-1]

        scale = tf.get_variable("scale",
                                shape=[input_size],
                                initializer=tf.ones_initializer())
        bias = tf.get_variable("bias",
                               shape=[input_size],
                               initializer=tf.zeros_initializer)

        mean = tf.reduce_mean(input_data, -1, True)
        variance = tf.reduce_mean(tf.square(input_data - mean), -1, True)

        input_norm = (input_data - mean) * tf.rsqrt(variance + epsilon)
        output = input_norm * scale + bias

        return output