def test_positional_encoding(self): position_embedding = model_utils.positional_encoding( max_seq_size=4, embedding_dim=2).numpy() expected_output = np.array([[0., 1.], [0.84147096, 0.5403023], [0.9092974, -0.41614684], [0.14112, -0.9899925]]) self.assertSequenceEqual(position_embedding.shape, [1, 4, 2]) np.testing.assert_array_almost_equal(position_embedding[0], expected_output)
def __init__(self, output_dimension, max_sequence_size, vocab_size, input_embedding_dimension, num_representations=1, use_positional_encoding=False, use_projection_layer=False, mask_zero=False): """Initializes the parameteric attention model. Args: output_dimension: The output dimension of the user representation. max_sequence_size: The maximum size of the input sequence. vocab_size: The vocabulary size for input tokens/items. input_embedding_dimension: The embedding dimension for input tokens/items. num_representations: Number of output representations. use_positional_encoding: Whether positional encoding is applied or not. use_projection_layer: Whether to apply projection before using parametric attention. Used for the SUR model to increase the number of paramaters. mask_zero: If true, uses zero in sequence as the mask. """ super(SimpleParametricAttention, self).__init__() self._input_embedding_dimension = input_embedding_dimension self._use_positional_encoding = use_positional_encoding self._positional_encoding = model_utils.positional_encoding( max_sequence_size, input_embedding_dimension) self._output_dimension = output_dimension self._attention = tf.keras.layers.Attention(use_scale=True) self._num_heads = num_representations self._mask_zero = mask_zero self._reset_query_head() self.embedding = tf.keras.layers.Embedding( vocab_size, input_embedding_dimension, mask_zero=mask_zero, embeddings_initializer='normal') if use_projection_layer and self._num_heads == 1: # Linear layer for SUR model (to increase num_parameters). self._projection = tf.keras.layers.Dense(self._embedding_dimension, use_bias=False) else: # Identity projection self._projection = tf.keras.layers.Layer()
def __init__(self, num_mha_layers, num_mha_heads, mha_dropout, output_dimension, max_sequence_size, vocab_size, input_embedding_dimension, num_representations = 1, use_positional_encoding = False, use_projection_layer = False, mask_zero = False): """Initializes the parameteric attention model. Args: num_mha_layers: The number of layers of Multi-Headed Attention(MHA). num_mha_heads: The number of heads to use for MHA. mha_dropout: Dropout for MHA. output_dimension: The output dimension of the user representation. max_sequence_size: The maximum size of the input sequence. vocab_size: The vocabulary size for input tokens/items. input_embedding_dimension: The embedding dimension for input tokens/items. num_representations: Number of output representations. use_positional_encoding: Whether positional encoding is applied or not. use_projection_layer: Whether to apply projection before using parametric attention. mask_zero: If true, uses zero in sequence as the mask. """ super(ParametricAttentionEncoder, self).__init__() self._num_mha_layers = num_mha_layers self._num_mha_heads = num_mha_heads self._mha_dropout = mha_dropout self._input_embedding_dimension = input_embedding_dimension self._use_positional_encoding = use_positional_encoding self._positional_encoding = model_utils.positional_encoding( max_sequence_size, input_embedding_dimension) self._output_dimension = output_dimension self._attention = tf.keras.layers.Attention(use_scale=True) self._num_heads = num_representations self._mask_zero = mask_zero self.embedding = tf.keras.layers.Embedding( vocab_size, input_embedding_dimension, mask_zero=mask_zero, name="user_tower_item_input_embedding", embeddings_initializer=tf.keras.initializers.RandomUniform( minval=-0.1, maxval=0.1)) self._input_projection = tf.keras.layers.Dense( output_dimension, use_bias=True) self.reset_query_head() self._mha_layers = [] for _ in range(num_mha_layers): self._mha_layers.append( tf.keras.layers.MultiHeadAttention( num_mha_heads, output_dimension, output_shape=output_dimension, dropout=mha_dropout)) if use_projection_layer: self._output_projection = tf.keras.layers.Dense( output_dimension, use_bias=True, kernel_regularizer=tf.keras.regularizers.L2(0.001)) else: # Identity projection self._output_projection = tf.keras.layers.Layer()