コード例 #1
0
    def test_positional_encoding(self):
        position_embedding = model_utils.positional_encoding(
            max_seq_size=4, embedding_dim=2).numpy()

        expected_output = np.array([[0., 1.], [0.84147096, 0.5403023],
                                    [0.9092974, -0.41614684],
                                    [0.14112, -0.9899925]])

        self.assertSequenceEqual(position_embedding.shape, [1, 4, 2])
        np.testing.assert_array_almost_equal(position_embedding[0],
                                             expected_output)
コード例 #2
0
    def __init__(self,
                 output_dimension,
                 max_sequence_size,
                 vocab_size,
                 input_embedding_dimension,
                 num_representations=1,
                 use_positional_encoding=False,
                 use_projection_layer=False,
                 mask_zero=False):
        """Initializes the parameteric attention model.

    Args:
      output_dimension: The output dimension of the user representation.
      max_sequence_size: The maximum size of the input sequence.
      vocab_size: The vocabulary size for input tokens/items.
      input_embedding_dimension: The embedding dimension for input tokens/items.
      num_representations: Number of output representations.
      use_positional_encoding: Whether positional encoding is applied or not.
      use_projection_layer: Whether to apply projection before using parametric
        attention. Used for the SUR model to increase the number of paramaters.
      mask_zero: If true, uses zero in sequence as the mask.
    """

        super(SimpleParametricAttention, self).__init__()
        self._input_embedding_dimension = input_embedding_dimension
        self._use_positional_encoding = use_positional_encoding
        self._positional_encoding = model_utils.positional_encoding(
            max_sequence_size, input_embedding_dimension)
        self._output_dimension = output_dimension
        self._attention = tf.keras.layers.Attention(use_scale=True)
        self._num_heads = num_representations
        self._mask_zero = mask_zero

        self._reset_query_head()

        self.embedding = tf.keras.layers.Embedding(
            vocab_size,
            input_embedding_dimension,
            mask_zero=mask_zero,
            embeddings_initializer='normal')

        if use_projection_layer and self._num_heads == 1:
            # Linear layer for SUR model (to increase num_parameters).
            self._projection = tf.keras.layers.Dense(self._embedding_dimension,
                                                     use_bias=False)
        else:
            # Identity projection
            self._projection = tf.keras.layers.Layer()
  def __init__(self,
               num_mha_layers,
               num_mha_heads,
               mha_dropout,
               output_dimension,
               max_sequence_size,
               vocab_size,
               input_embedding_dimension,
               num_representations = 1,
               use_positional_encoding = False,
               use_projection_layer = False,
               mask_zero = False):
    """Initializes the parameteric attention model.

    Args:
      num_mha_layers: The number of layers of Multi-Headed Attention(MHA).
      num_mha_heads: The number of heads to use for MHA.
      mha_dropout: Dropout for MHA.
      output_dimension: The output dimension of the user representation.
      max_sequence_size: The maximum size of the input sequence.
      vocab_size: The vocabulary size for input tokens/items.
      input_embedding_dimension: The embedding dimension for input tokens/items.
      num_representations: Number of output representations.
      use_positional_encoding: Whether positional encoding is applied or not.
      use_projection_layer: Whether to apply projection before using parametric
        attention.
      mask_zero: If true, uses zero in sequence as the mask.
    """

    super(ParametricAttentionEncoder, self).__init__()
    self._num_mha_layers = num_mha_layers
    self._num_mha_heads = num_mha_heads
    self._mha_dropout = mha_dropout
    self._input_embedding_dimension = input_embedding_dimension
    self._use_positional_encoding = use_positional_encoding
    self._positional_encoding = model_utils.positional_encoding(
        max_sequence_size, input_embedding_dimension)
    self._output_dimension = output_dimension
    self._attention = tf.keras.layers.Attention(use_scale=True)
    self._num_heads = num_representations
    self._mask_zero = mask_zero

    self.embedding = tf.keras.layers.Embedding(
        vocab_size,
        input_embedding_dimension,
        mask_zero=mask_zero,
        name="user_tower_item_input_embedding",
        embeddings_initializer=tf.keras.initializers.RandomUniform(
            minval=-0.1, maxval=0.1))

    self._input_projection = tf.keras.layers.Dense(
        output_dimension, use_bias=True)

    self.reset_query_head()

    self._mha_layers = []
    for _ in range(num_mha_layers):
      self._mha_layers.append(
          tf.keras.layers.MultiHeadAttention(
              num_mha_heads,
              output_dimension,
              output_shape=output_dimension,
              dropout=mha_dropout))

    if use_projection_layer:
      self._output_projection = tf.keras.layers.Dense(
          output_dimension,
          use_bias=True,
          kernel_regularizer=tf.keras.regularizers.L2(0.001))
    else:
      # Identity projection
      self._output_projection = tf.keras.layers.Layer()