Exemple #1
0
def positional_signal(hidden_size: int,
                      length: int,
                      min_timescale: float = 1.0,
                      max_timescale: float = 1e4):
    """
    Helper function, constructing basic positional encoding.
    The code is partially based on implementation from Tensor2Tensor library
    https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py
    """

    if hidden_size % 2 != 0:
        raise ValueError(
            f"The hidden dimension of the model must be divisible by 2."
            f"Currently it is {hidden_size}")
    position = K.arange(0, length, dtype=K.floatx())
    num_timescales = hidden_size // 2
    log_timescale_increment = K.constant(
        (np.log(float(max_timescale) / float(min_timescale)) /
         (num_timescales - 1)),
        dtype=K.floatx())
    inv_timescales = (min_timescale * K.exp(
        K.arange(num_timescales, dtype=K.floatx()) * -log_timescale_increment))
    scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0)
    signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1)
    return K.expand_dims(signal, axis=0)
Exemple #2
0
 def call(self, inputs, **kwargs):
     length = K.shape(inputs[0])[1] + K.shape(inputs[1])[1]
     inputs = K.tile(
         K.expand_dims(K.arange(length - 1, -1, -1, dtype=K.floatx()), axis=0),
         [K.shape(inputs[0])[0], 1],
     )
     if self.clamp_len is not None:
         inputs = K.clip(inputs, min_value=0, max_value=self.clamp_len)
     inputs = K.expand_dims(inputs, axis=-1)
     output_dim = K.cast(self.output_dim, K.floatx())
     ranges = K.expand_dims(K.arange(0.0, self.output_dim, 2.0), axis=0) / output_dim
     inverse = 1.0 / K.pow(10000.0, ranges)
     positions = inputs * inverse
     return K.concatenate([K.sin(positions), K.cos(positions)], axis=-1)
Exemple #3
0
 def call(self, x, **kwargs):
     if (self.size is None) or (self.mode == 'sum'):
         self.size = int(x.shape[-1])
     batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
     position_j = 1. / K.pow(
         10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
     position_j = K.expand_dims(position_j, 0)
     # K.arange不支持变长,只好用这种方法生成
     position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
     position_i = K.expand_dims(position_i, 2)
     position_ij = K.dot(position_i, position_j)
     position_ij = K.concatenate(
         [K.cos(position_ij), K.sin(position_ij)], 2)
     if self.mode == 'sum':
         return position_ij + x
     elif self.mode == 'concat':
         return K.concatenate([position_ij, x], 2)
Exemple #4
0
    def call(self, x, mask=None):
        if (self.size == None) or (self.mode == 'sum'):
            self.size = int(x.shape[-1])

        position_j = 1. / \
                     K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
        position_j = K.expand_dims(position_j, 0)

        position_i = tf.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
        position_i = K.expand_dims(position_i, 2)
        position_ij = K.dot(position_i, position_j)
        outputs = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)

        if self.mode == 'sum':
            if self.scale:
                outputs = outputs * self.size**0.5
            return x + outputs
        elif self.mode == 'concat':
            return K.concatenate([outputs, x], 2)